Created
August 12, 2016 19:29
-
-
Save grahamanderson/0450ef1072026559bdacbd198f58f4d0 to your computer and use it in GitHub Desktop.
projects/dnc_leaks/DNC-Modeling.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "metadata": { | |
| "trusted": true, | |
| "collapsed": false | |
| }, | |
| "cell_type": "code", | |
| "source": "import matplotlib.pyplot as plt\n%matplotlib inline\nimport pandas as pd\nimport re\nimport numpy as np\nimport pickle\nimport nltk\n\nstopwords = nltk.corpus.stopwords.words('english')\n# load nltk's SnowballStemmer as variabled 'stemmer'\nfrom nltk.stem.snowball import SnowballStemmer\nstemmer = SnowballStemmer(\"english\")\n\n", | |
| "execution_count": 3, | |
| "outputs": [] | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": true, | |
| "collapsed": true | |
| }, | |
| "cell_type": "code", | |
| "source": "df = pickle.load( open( \"dnc_df_no_everything.pkl\", \"rb\" ) )", | |
| "execution_count": 4, | |
| "outputs": [] | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": true, | |
| "collapsed": true | |
| }, | |
| "cell_type": "code", | |
| "source": "# Helper Functions\n# here I define a tokenizer and stemmer which returns the set of stems in the text that it is passed\n\n\ndef getStems(text):\n stemmmer = nltk.stem.porter.PorterStemmer()\n return [stemmer.stem(x) for x in getTokens(text)]\n\n# def get\n# not in stopwords add it into an array\n\ndef tokenize_and_stem(text):\n # first tokenize by sentence, then by word to ensure that punctuation is caught as it's own token\n tokens = [word for sent in nltk.sent_tokenize(text) for word in nltk.word_tokenize(sent)]\n filtered_tokens = []\n # filter out any tokens not containing letters (e.g., numeric tokens, raw punctuation)\n for token in tokens:\n if re.search('[a-zA-Z]', token):\n filtered_tokens.append(token)\n stems = [stemmer.stem(t) for t in filtered_tokens]\n return stems\n\n\ndef tokenize_only(text):\n # first tokenize by sentence, then by word to ensure that punctuation is caught as it's own token\n tokens = [word.lower() for sent in nltk.sent_tokenize(text) for word in nltk.word_tokenize(sent)]\n filtered_tokens = []\n # filter out any tokens not containing letters (e.g., numeric tokens, raw punctuation)\n for token in tokens:\n if re.search('[a-zA-Z]', token):\n filtered_tokens.append(token)\n return filtered_tokens", | |
| "execution_count": 5, | |
| "outputs": [] | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": true, | |
| "collapsed": false | |
| }, | |
| "cell_type": "code", | |
| "source": "from sklearn.feature_extraction.text import TfidfVectorizer\n\n#define vectorizer parameters\ntfidf_vectorizer = TfidfVectorizer(stop_words='english', max_df=0.8, max_features=200000, \n min_df=0.2, use_idf=True, ngram_range=(1,3))\n# max_df=0.8, max_features=200000,\n# min_df=0.2, stop_words='english',\n# use_idf=True, tokenizer=tokenize_and_stem, ngram_range=(1,3))\n\n\n\ntfidf_matrix = tfidf_vectorizer.fit_transform(df.body) #fit the vectorizer to synopses\n\nprint(tfidf_matrix.shape)", | |
| "execution_count": 11, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "text": "(12286, 24)\n", | |
| "name": "stdout" | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": true, | |
| "collapsed": true | |
| }, | |
| "cell_type": "code", | |
| "source": "X3 = tfidf_matrix", | |
| "execution_count": 13, | |
| "outputs": [] | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": true, | |
| "collapsed": false | |
| }, | |
| "cell_type": "code", | |
| "source": "from sklearn.cluster import KMeans", | |
| "execution_count": 12, | |
| "outputs": [] | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": true, | |
| "collapsed": true | |
| }, | |
| "cell_type": "code", | |
| "source": "c = 5\nmodelKM = KMeans(n_clusters = c)\nmodelKM.fit(X3)\n\nclusters = modelKM.labels_\n", | |
| "execution_count": 15, | |
| "outputs": [] | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": true, | |
| "collapsed": false | |
| }, | |
| "cell_type": "code", | |
| "source": "df.head()", | |
| "execution_count": 23, | |
| "outputs": [ | |
| { | |
| "output_type": "execute_result", | |
| "data": { | |
| "text/html": "<div>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>date</th>\n <th>from_name</th>\n <th>from_email</th>\n <th>to</th>\n <th>subject</th>\n <th>body</th>\n <th>cluster</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>Tue, 17 May 2016 19:51:22 -0700</td>\n <td>Maureen Garde</td>\n <td>GardeM@dnc.org</td>\n <td>\"Davis, Marilyn\" <DavisM@dnc.org></td>\n <td>Re: CT To Automatically Register 400,000 Voters</td>\n <td>how many more states can we get to follow conn...</td>\n <td>4</td>\n </tr>\n <tr>\n <th>1</th>\n <td>Mon, 2 May 2016 22:19:09 -0400</td>\n <td>Contribution</td>\n <td>postmaster@finance.democrats.org</td>\n <td><kaplanj@dnc.org>, <comers@dnc.org>, <olszewsk...</td>\n <td>Contribution: DE008 - DWS WLF Reception / Shek...</td>\n <td>contribution data page de...</td>\n <td>2</td>\n </tr>\n <tr>\n <th>2</th>\n <td>Wed, 27 Apr 2016 03:48:42 -0600</td>\n <td>POLITICO</td>\n <td>email@politicoemail.com</td>\n <td><kaplanj@dnc.org></td>\n <td>POLITICO's Daily Congress Digest for Wednesday...</td>\n <td>politicos daily congress digest for wednesday ...</td>\n <td>4</td>\n </tr>\n <tr>\n <th>3</th>\n <td>Thu, 19 May 2016 08:42:10 -0700</td>\n <td>Corinne Matti</td>\n <td>MattiC@dnc.org</td>\n <td>\"Miranda, Luis\" <MirandaL@dnc.org></td>\n <td>Re: America's Newsroom (FNC) - Luis Miranda (S...</td>\n <td>yup they also have iq media and snapstreamon m...</td>\n <td>4</td>\n </tr>\n <tr>\n <th>4</th>\n <td>Fri, 20 May 2016 11:30:27 -0700</td>\n <td>Brinster Jeremy</td>\n <td>BrinsterJ@dnc.org</td>\n <td>\"Walker, Eric\" <WalkerE@dnc.org>, \"Wei, Shu-Ye...</td>\n <td>RE: For Comms Approval: Round up of Trump-Chri...</td>\n <td>looks good please dont forgot to change the su...</td>\n <td>3</td>\n </tr>\n </tbody>\n</table>\n</div>", | |
| "text/plain": " date from_name \\\n0 Tue, 17 May 2016 19:51:22 -0700 Maureen Garde \n1 Mon, 2 May 2016 22:19:09 -0400 Contribution \n2 Wed, 27 Apr 2016 03:48:42 -0600 POLITICO \n3 Thu, 19 May 2016 08:42:10 -0700 Corinne Matti \n4 Fri, 20 May 2016 11:30:27 -0700 Brinster Jeremy \n\n from_email \\\n0 GardeM@dnc.org \n1 postmaster@finance.democrats.org \n2 email@politicoemail.com \n3 MattiC@dnc.org \n4 BrinsterJ@dnc.org \n\n to \\\n0 \"Davis, Marilyn\" <DavisM@dnc.org> \n1 <kaplanj@dnc.org>, <comers@dnc.org>, <olszewsk... \n2 <kaplanj@dnc.org> \n3 \"Miranda, Luis\" <MirandaL@dnc.org> \n4 \"Walker, Eric\" <WalkerE@dnc.org>, \"Wei, Shu-Ye... \n\n subject \\\n0 Re: CT To Automatically Register 400,000 Voters \n1 Contribution: DE008 - DWS WLF Reception / Shek... \n2 POLITICO's Daily Congress Digest for Wednesday... \n3 Re: America's Newsroom (FNC) - Luis Miranda (S... \n4 RE: For Comms Approval: Round up of Trump-Chri... \n\n body cluster \n0 how many more states can we get to follow conn... 4 \n1 contribution data page de... 2 \n2 politicos daily congress digest for wednesday ... 4 \n3 yup they also have iq media and snapstreamon m... 4 \n4 looks good please dont forgot to change the su... 3 " | |
| }, | |
| "metadata": {}, | |
| "execution_count": 23 | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": true, | |
| "collapsed": false | |
| }, | |
| "cell_type": "code", | |
| "source": "df['tokendf['tokens'] = tfidf_vectorizer.get_feature_names()\ndf['cluster']= clusters", | |
| "execution_count": 22, | |
| "outputs": [ | |
| { | |
| "output_type": "error", | |
| "ename": "ValueError", | |
| "evalue": "Length of values does not match length of index", | |
| "traceback": [ | |
| "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
| "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", | |
| "\u001b[0;32m<ipython-input-22-3a749e7f6577>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'tokens'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtfidf_vectorizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_feature_names\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'cluster'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m=\u001b[0m \u001b[0mclusters\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m//anaconda/lib/python3.5/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m__setitem__\u001b[0;34m(self, key, value)\u001b[0m\n\u001b[1;32m 2355\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2356\u001b[0m \u001b[0;31m# set column\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2357\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_set_item\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2358\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2359\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_setitem_slice\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m//anaconda/lib/python3.5/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m_set_item\u001b[0;34m(self, key, value)\u001b[0m\n\u001b[1;32m 2421\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2422\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_ensure_valid_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2423\u001b[0;31m \u001b[0mvalue\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sanitize_column\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2424\u001b[0m \u001b[0mNDFrame\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_set_item\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2425\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m//anaconda/lib/python3.5/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m_sanitize_column\u001b[0;34m(self, key, value)\u001b[0m\n\u001b[1;32m 2576\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2577\u001b[0m \u001b[0;31m# turn me into an ndarray\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2578\u001b[0;31m \u001b[0mvalue\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_sanitize_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2579\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndarray\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mIndex\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2580\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m//anaconda/lib/python3.5/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36m_sanitize_index\u001b[0;34m(data, index, copy)\u001b[0m\n\u001b[1;32m 2768\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2769\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2770\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Length of values does not match length of '\u001b[0m \u001b[0;34m'index'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2771\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2772\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mPeriodIndex\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;31mValueError\u001b[0m: Length of values does not match length of index" | |
| ] | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": true, | |
| "collapsed": false | |
| }, | |
| "cell_type": "code", | |
| "source": "df.head()", | |
| "execution_count": 19, | |
| "outputs": [ | |
| { | |
| "output_type": "execute_result", | |
| "data": { | |
| "text/html": "<div>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>date</th>\n <th>from_name</th>\n <th>from_email</th>\n <th>to</th>\n <th>subject</th>\n <th>body</th>\n <th>cluster</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>Tue, 17 May 2016 19:51:22 -0700</td>\n <td>Maureen Garde</td>\n <td>GardeM@dnc.org</td>\n <td>\"Davis, Marilyn\" <DavisM@dnc.org></td>\n <td>Re: CT To Automatically Register 400,000 Voters</td>\n <td>how many more states can we get to follow conn...</td>\n <td>4</td>\n </tr>\n <tr>\n <th>1</th>\n <td>Mon, 2 May 2016 22:19:09 -0400</td>\n <td>Contribution</td>\n <td>postmaster@finance.democrats.org</td>\n <td><kaplanj@dnc.org>, <comers@dnc.org>, <olszewsk...</td>\n <td>Contribution: DE008 - DWS WLF Reception / Shek...</td>\n <td>contribution data page de...</td>\n <td>2</td>\n </tr>\n <tr>\n <th>2</th>\n <td>Wed, 27 Apr 2016 03:48:42 -0600</td>\n <td>POLITICO</td>\n <td>email@politicoemail.com</td>\n <td><kaplanj@dnc.org></td>\n <td>POLITICO's Daily Congress Digest for Wednesday...</td>\n <td>politicos daily congress digest for wednesday ...</td>\n <td>4</td>\n </tr>\n <tr>\n <th>3</th>\n <td>Thu, 19 May 2016 08:42:10 -0700</td>\n <td>Corinne Matti</td>\n <td>MattiC@dnc.org</td>\n <td>\"Miranda, Luis\" <MirandaL@dnc.org></td>\n <td>Re: America's Newsroom (FNC) - Luis Miranda (S...</td>\n <td>yup they also have iq media and snapstreamon m...</td>\n <td>4</td>\n </tr>\n <tr>\n <th>4</th>\n <td>Fri, 20 May 2016 11:30:27 -0700</td>\n <td>Brinster Jeremy</td>\n <td>BrinsterJ@dnc.org</td>\n <td>\"Walker, Eric\" <WalkerE@dnc.org>, \"Wei, Shu-Ye...</td>\n <td>RE: For Comms Approval: Round up of Trump-Chri...</td>\n <td>looks good please dont forgot to change the su...</td>\n <td>3</td>\n </tr>\n </tbody>\n</table>\n</div>", | |
| "text/plain": " date from_name \\\n0 Tue, 17 May 2016 19:51:22 -0700 Maureen Garde \n1 Mon, 2 May 2016 22:19:09 -0400 Contribution \n2 Wed, 27 Apr 2016 03:48:42 -0600 POLITICO \n3 Thu, 19 May 2016 08:42:10 -0700 Corinne Matti \n4 Fri, 20 May 2016 11:30:27 -0700 Brinster Jeremy \n\n from_email \\\n0 GardeM@dnc.org \n1 postmaster@finance.democrats.org \n2 email@politicoemail.com \n3 MattiC@dnc.org \n4 BrinsterJ@dnc.org \n\n to \\\n0 \"Davis, Marilyn\" <DavisM@dnc.org> \n1 <kaplanj@dnc.org>, <comers@dnc.org>, <olszewsk... \n2 <kaplanj@dnc.org> \n3 \"Miranda, Luis\" <MirandaL@dnc.org> \n4 \"Walker, Eric\" <WalkerE@dnc.org>, \"Wei, Shu-Ye... \n\n subject \\\n0 Re: CT To Automatically Register 400,000 Voters \n1 Contribution: DE008 - DWS WLF Reception / Shek... \n2 POLITICO's Daily Congress Digest for Wednesday... \n3 Re: America's Newsroom (FNC) - Luis Miranda (S... \n4 RE: For Comms Approval: Round up of Trump-Chri... \n\n body cluster \n0 how many more states can we get to follow conn... 4 \n1 contribution data page de... 2 \n2 politicos daily congress digest for wednesday ... 4 \n3 yup they also have iq media and snapstreamon m... 4 \n4 looks good please dont forgot to change the su... 3 " | |
| }, | |
| "metadata": {}, | |
| "execution_count": 19 | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": true, | |
| "collapsed": false | |
| }, | |
| "cell_type": "code", | |
| "source": "pd.Series(df.body[0].split(\" \")).value_counts()", | |
| "execution_count": 21, | |
| "outputs": [ | |
| { | |
| "output_type": "execute_result", | |
| "data": { | |
| "text/plain": "the 36\nto 23\n 21\na 15\nof 14\nand 9\nconnecticut 9\nstate 8\nin 8\nis 7\nvoters 7\nwere 6\ndmv 5\nstates 5\nthat 5\nregister 5\nautomatic 5\nmay 5\nvoter 4\nwill 4\nover 4\nat 4\non 4\nthis 4\nit 4\nthey 4\nby 4\nfrom 4\nregistration 4\nautomatically 4\n ..\nvisit 1\nfollow 1\nauthorized 1\nhas 1\nshould 1\nth 1\ngo 1\nwhile 1\nregarding 1\nexpansiondemocratic 1\ncloser 1\nwroteconnecticut 1\nbefore 1\nbig 1\ndesignated 1\nunderstanding 1\ncollect 1\npratt 1\nprimaryhttpthinkprogressorgpoliticsoregonclosedprimary 1\nmajority 1\npmon 1\nfuture 1\nreally 1\noffering 1\nmarilyn 1\niphoneon 1\nhoping 1\ntwitter 1\nawesomesent 1\nproblem 1\ndtype: int64" | |
| }, | |
| "metadata": {}, | |
| "execution_count": 21 | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": true, | |
| "collapsed": true | |
| }, | |
| "cell_type": "code", | |
| "source": "", | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": true, | |
| "collapsed": true | |
| }, | |
| "cell_type": "code", | |
| "source": "", | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": true, | |
| "collapsed": true | |
| }, | |
| "cell_type": "code", | |
| "source": "", | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": true, | |
| "collapsed": true | |
| }, | |
| "cell_type": "code", | |
| "source": "", | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": true, | |
| "collapsed": true | |
| }, | |
| "cell_type": "code", | |
| "source": "", | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": true, | |
| "collapsed": true | |
| }, | |
| "cell_type": "code", | |
| "source": "", | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": true, | |
| "collapsed": false | |
| }, | |
| "cell_type": "code", | |
| "source": "from sklearn.decomposition import PCA\nfrom sklearn.decomposition import TruncatedSVD\ntsvd = TruncatedSVD(10)\ntsvd_data = tsvd.fit_transform(X)", | |
| "execution_count": 32, | |
| "outputs": [] | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": true, | |
| "collapsed": false | |
| }, | |
| "cell_type": "code", | |
| "source": "tsvd_data", | |
| "execution_count": 15, | |
| "outputs": [ | |
| { | |
| "metadata": {}, | |
| "execution_count": 15, | |
| "data": { | |
| "text/plain": "array([[ 4.81792362e+01, -2.08029647e+00],\n [ 1.10853647e+00, 3.25939466e-02],\n [ 1.34308897e+01, 2.30990310e-01],\n ..., \n [ 2.07420533e+01, 1.15874045e-01],\n [ 3.26191232e+00, 1.11195556e+00],\n [ 5.41718397e+00, 9.67399758e-01]])" | |
| }, | |
| "output_type": "execute_result" | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": true, | |
| "collapsed": false | |
| }, | |
| "cell_type": "code", | |
| "source": "tsvd.explained_variance_ratio_", | |
| "execution_count": 23, | |
| "outputs": [ | |
| { | |
| "metadata": {}, | |
| "execution_count": 23, | |
| "data": { | |
| "text/plain": "array([ 0.75159268, 0.06964022, 0.02216457, 0.01992867, 0.01345506,\n 0.01144528])" | |
| }, | |
| "output_type": "execute_result" | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": true, | |
| "collapsed": false | |
| }, | |
| "cell_type": "code", | |
| "source": "pd.DataFrame(np.cumsum(tsvd.explained_variance_ratio_)).plot()\nplt.legend(['Variance'])", | |
| "execution_count": 34, | |
| "outputs": [ | |
| { | |
| "metadata": {}, | |
| "execution_count": 34, | |
| "data": { | |
| "text/plain": "<matplotlib.legend.Legend at 0x13d79e668>" | |
| }, | |
| "output_type": "execute_result" | |
| }, | |
| { | |
| "metadata": {}, | |
| "data": { | |
| "text/plain": "<matplotlib.figure.Figure at 0x13d79e5c0>", | |
| "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAEACAYAAAC9Gb03AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xt4VOW59/HvHQEVKYhA1QJGeSl4qH3FKqJ4GMRKsC2g\nrRWrgFoRq+5u6wnr1hLdrZco8vpad3dLtYCipd1YCLQgAWRQWoGoKKCJ8QQiYAsFqhGBkNz7jzUh\nQ0iYCZlkzeH3ua65MrNOcyeE36w863meZe6OiIhkr7ywCxARkealoBcRyXIKehGRLKegFxHJcgp6\nEZEsp6AXEclySQW9mRWYWZmZlZvZ2HrW32FmK83sdTNbbWZ7zOzIZPYVEZHmZYn60ZtZHlAODAQ2\nAiXAcHcva2D7bwO3uvtFjd1XRERSL5kz+r7Au+6+zt0rgenA0ANsfyXw+4PcV0REUiyZoO8KrI97\n/XFs2X7M7HCgAHi+sfuKiEjzSPXF2O8AS919e4qPKyIiB6lVEttsAI6Le90ttqw+w6lttmnUvmam\nSXdERBrJ3S3RNsmc0ZcAPc0s38zaEIT57LobmVkH4AKgqLH7xhWcVo9x48aFXoNqyp6a0rUu1ZS5\nNSUr4Rm9u1eZ2S1AMcEHw1PuXmpmY4LVPim26TBgvrt/kWjfpKsTEZEmS6bpBnd/AehdZ9lv6rye\nCkxNZl8REWk5Ghl7AJFIJOwS9qOakpOONUF61qWakpOONSUr4YCplmJmni61iIhkAjPDk7gYm1TT\njYjktuOPP55169aFXUbOys/PZ+3atQe9v87oRSSh2Jlj2GXkrIZ+/sme0auNXkQkyynoRUSynIJe\nRCTLKehFROKsX7+e9u3bZ9U1CQW9iGS0wYMHU1hYuN/yoqIijj32WKqrqxt1vO7du/Ppp59ilvAa\nZ8ZQ0ItIRhs1ahTTpk3bb/m0adMYMWIEeXnJx1xVVVUqS0sbCnoRyWjDhg3jn//8J0uXLt27bPv2\n7fz5z39mxIgRzJ07l9NPP50OHTqQn5/P/fffv3e7devWkZeXx+9+9zvy8/MZOHDg3mU1fwlMmTKF\nk08+mfbt29OzZ08mTZq0d/8lS5bQvXt3Jk6cyNFHH03Xrl2ZMmXK3vU7d+7k9ttv5/jjj6djx46c\nf/757Nq1C4Bly5bRv39/OnbsSJ8+fViyZEnz/ZDCnn0tbhY2F5H0lO7/P0ePHu2jR4/e+/rXv/61\n9+nTx93do9Gor1mzxt3dV69e7cccc4wXFRW5u/vatWvdzHzUqFG+Y8cO37lzp69du9bz8vK8qqrK\n3d3nzp3rH374obu7v/TSS962bVtfuXLl3mO3atXKCwsLfc+ePT537lxv27atb9++3d3db7rpJh8w\nYIBv2rTJq6ur/ZVXXvHdu3f7hg0bvFOnTv7CCy+4u/vChQu9U6dOvmXLlnq/v4Z+/rHlifM1mY1a\n4pHuv0giuSzR/09IzeNgLV261I888kjftWuXu7v379/fH3vssXq3vfXWW/22225zd98b6mvXrt27\nvm7Q1zVs2DB//PHH3T0I+rZt2+6z7Ze//GVfvny5V1dX++GHH+6rV6/e7xjjx4/3kSNH7rNs0KBB\n/vTTT9f7nk0Nek2BICJNFnYHlf79+9OlSxdmzZrFGWecQUlJCTNnzgRg+fLl/PSnP2XNmjXs3r2b\n3bt3c/nll++zf7du3Ro89rx583jggQcoLy+nurqaL774gq9//et713fq1Gmf6wBt27aloqKCLVu2\nsGvXLnr06LHfMdetW8cf//hH5syZAwQn3Hv27OHCCy9ssI5Fi2D9+n0fyVIbvYhkhREjRjB16lSm\nTZvGoEGD6NKlCwBXXXUVw4YNY8OGDWzfvp0xY8bs13WyoR42u3fv5nvf+x533XUXmzdvZtu2bQwe\nPDiprpedO3fmsMMO4/33399vXffu3Rk5ciRbt25l69atbNu2jc8++4y77rqrweP9538GYf/FF9Cn\nD9x8c8IS9lLQi0hWGDlyJAsXLuTJJ59k1KhRe5dXVFTQsWNHWrduzYoVK3juuef22a++0K5ZVvMX\nQOfOncnLy2PevHkUFxcnVY+Zce2113LbbbexadMmqqurWbZsGZWVlVx99dXMmTOH4uJiqqur2blz\nJ0uWLGHjxo0NHi8ahWeegQcfhB/9CL797aTKABT0IpIl8vPzOeecc9ixYwdDhgzZu/xXv/oV9913\nHx06dODnP/85V1xxxT771Xc2X7OsXbt2PP7441x++eUcddRRTJ8+naFDhx6wjvjjTZgwgVNPPZUz\nzzyTTp06cffdd1NdXU23bt0oKiriwQcfpEuXLuTn5zNhwoRG9/lPlmavFJGENHtluDR7pYiIHJCC\nXkQkyynoRUSynIJeRCTLKehFRLJcUkFvZgVmVmZm5WY2toFtIma20szWmNniuOVrzezN2LoVqSpc\nRESSk3AKBDPLA54ABgIbgRIzK3L3srhtOgD/BVzs7hvMrHPcIaqBiLtvS23pItJS8vPzs2p+9kyT\nn5/fpP2TmeumL/Cuu68DMLPpwFCgLG6bHwDPu/sGAHffErfOUBORSEZbu3YtABUVUF4OZWX7Pt57\nDzp3hhNP3P9x7LGgz4hwJRP0XYH46XM+Jgj/eL2A1rEmm3bA4+7+TGydAwvMrAqY5O6/bWLNItKM\n3GHTpv3DvKwMtmyBr361NsSHDQu+9uoF7dqFXbk0JFWzV7YCTgcuBI4AXjGzV9z9PaC/u28ysy4E\ngV/q7kvrO0j87cAikQiRSCRF5YlIXe7w7ruwZs3+gX744fuelQ8eHHw97jg45JCwK89d0WiUaDTa\n6P0SToFgZv2AQncviL2+m2AO5PFx24wFDnP3+2OvnwTmufvzdY41DvjM3SfW8z6aAkGkGVVXB6G+\nZAm89FLwOPRQOP30IMR79679etRRYVcryUh2CoRkgv4Q4B2Ci7GbgBXAle5eGrfNicAvgQLgUGA5\ncAWwFshz9wozOwIoBu539/2mf1PQi6RWZSWsXFkb6kuXQpcucP75tY8mXuOTkCUb9Ambbty9ysxu\nIQjpPOApdy81szHBap/k7mVmNh9YBdS0xb9tZicAM83MY+/1bH0hLyJNt3MnrFhRG+zLlsEJJwSB\nPmIETJoExxwTdpUSBs1eKZKhKirglVdqg/211+CUU2rP1vv3VxNMtktZ001LUdCLHNi2bUHzS02w\nv/VW0L5eE+xnnw1f+lLYVUpLUtCLZLhPPoGXX64N9g8/hH79aoO9b1847LCwq5QwKehFMsxHH9WG\n+pIlsHkznHtubbD36QOtW4ddpaQTBb1IGqvpw14T7C+9FNz0Ob5HzNe+pj7rcmAKepE0Ul0dtKnH\n92Fv0wYuuKA22Hv10lQB0jgKepEQucMHH8CiRcFj8WJo3x4ikdpwVx92aSoFvUgL27QJXnyxNtwr\nK2HgwOBx4YXB9AEiqaSgF2lm27YFTTE1wf7JJ8EZe0249+6tphhpXgp6kRTbsQP++tfaYC8rg3PO\nqQ32007TxVNpWQp6kSaqrISSktpgf/XVIMxrgv2ss4JJwUTCoqAXaaTqali1qradfelS6NGjNtjP\nO09zrkt6UdCLJOAe3Blp0aIg3Bcvho4da4M9EgnumiSSrhT0IvXYuLE22BctgqqqfXvGdO8edoUi\nyVPQiwBbt0I0Whvs//gHDBgQhPrAgRqkJJlNQS85yR3eeAOmTw+Cvbw8mK63JthPOw3ydKt6yRIK\neskp//gHPPssTJkC//oXXH01DBoU9Ixp0ybs6kSah4Jest7u3TB3LkyeHAxcGjoUrrkmmGJAZ+2S\nC1J2K0GRdPPGG8GZ+3PPBTezvvZamDZNN90QaYiCXjLC5s1BsE+ZElxgHTUK/vY36Nkz7MpE0p+a\nbiRtVVbCvHlB08zixTBkSNA0E4moaUYE1EYvGWzVquDM/dlng+6P11wDl18eTPMrIrXURi8ZZcsW\n+P3vg4DfvBlGjgymIPjqV8OuTCTz6YxeQlNZCS+8EIT7okXw7W8HZ+8DBmgWSJFkJHtGn1RLp5kV\nmFmZmZWb2dgGtomY2UozW2Nmixuzr+SWNWvgjjuC6QYeeggKCmDduqDnzEUXKeRFUi3hGb2Z5QHl\nwEBgI1ACDHf3srhtOgB/Ay529w1m1tndtySzb9wxdEafxbZuDZpmJk8ObtAxalTw6NUr7MpEMlcq\n2+j7Au+6+7rYgacDQ4H4sP4B8Ly7bwBw9y2N2Fey1J49MH9+0DSzYAFccgk8+GAwFYHO2kVaTjJB\n3xVYH/f6Y4IAj9cLaB1rsmkHPO7uzyS5r2SZt98Owv2ZZ+D444N299/+Fo48MuTCRHJUqnrdtAJO\nBy4EjgBeMbNXGnuQwsLCvc8jkQiRSCRF5Ulz27YtmEhs8mTYsCHoNbN4cTByVURSIxqNEo1GG71f\nMm30/YBCdy+Ivb4bcHcfH7fNWOAwd78/9vpJYB6wIdG+ccdQG30GevVVeOSRoImmoCA4e//mN9U0\nI9ISUtnrpgToaWb5ZtYGGA7MrrNNEXCumR1iZm2Bs4DSJPeVDLR+PYwYEYxWPfdc+PDD4Iy+oEAh\nL5JuEjbduHuVmd0CFBN8MDzl7qVmNiZY7ZPcvczM5gOrgCpgkru/DVDfvs31zUjzq6iA8ePhV7+C\nm26Cd97RZGIi6U4DpiQpVVXBBdb77gt6zTz4oG67JxI2TYEgKbNwIdx+ezDXTFERnHlm2BWJSGMo\n6KVBZWVw551Bd8mHH4bLLtP9VUUykSZ7lf1s2QK33ALnnRfMO/P22/Dd7yrkRTKVgl722rULJkyA\nk04K5nsvLYXbboNDDw27MhFpCjXdCO7w/PMwdiyccgq8/LIGOolkEwV9jluxIjhrr6iASZOCHjUi\nkl3UdJOjPvoIrr4ahg2D666D115TyItkKwV9jvnsM7j3XujTB3r0gPLyIOg1mlUkeynoc0RVVTCD\nZO/ewdn8G2/AAw9Au3ZhVyYizU1t9DlgwYJgwNORR8Ls2XDGGWFXJCItSUGfxUpLg1v2vfNOMODp\n0kvVF14kF6npJgtt3gw33wznnx9cYH3rLY1qFcllCvossmtXMDf8SSdBq1bBFAYa8CQiarrJAu4w\nY0Yw4OnUU+Gvfw0uuoqIgII+4y1fHpy1f/45PPkkXHhh2BWJSLpR002G+ugjuOqqoO39+uuDAU8K\neRGpj4I+w3z6KdxzTzDgqWfPoEfNtddqwJOINExNNxmkqAhuvBEuvhjefBO6dQu7IhHJBAr6DDF/\nPoweHYT92WeHXY2IZBLdMzYDLFsG3/kOzJwJ554bdjUiki6SvWes2ujT3FtvwdChMHWqQl5EDo6C\nPo2tXQsFBTBxIlxySdjViEimUtCnqb//PbjoeuedQTdKEZGDlVTQm1mBmZWZWbmZja1n/QVmtt3M\nXo897o1bt9bM3jSzlWa2IpXFZ6t//QsGD4Yrr4Qf/zjsakQk0yXsdWNmecATwEBgI1BiZkXuXlZn\n05fcfUg9h6gGIu6+rcnV5oAvvoAhQ4KeNYWFYVcjItkgmTP6vsC77r7O3SuB6cDQerZr6MqvJfk+\nOW/PHhg+HL7yFfjlLzXbpIikRjIB3BVYH/f649iyus42szfM7C9mdnLccgcWmFmJmY1uQq1Zrbo6\nmMpg166gh02ePhpFJEVSNWDqNeA4d99hZoOBWUCv2Lr+7r7JzLoQBH6puy+t7yCFcW0VkUiESCSS\novLSm3tw0fWdd2DhQmjTJuyKRCQdRaNRotFoo/dLOGDKzPoBhe5eEHt9N+DuPv4A+3wIfMPdt9ZZ\nPg74zN0n1rNPzg6YeughmDYNXnoJjjoq7GpEJFOkcsBUCdDTzPLNrA0wHJhd582Ojnvel+ADZKuZ\ntTWzdrHlRwAXA2sa8X1kvd/+Fn7zm2CKA4W8iDSHhE037l5lZrcAxQQfDE+5e6mZjQlW+yTge2b2\nI6AS+AK4Irb70cBMM/PYez3r7sXN8Y1kohkzYNw4WLIEutZ31UNEJAU0101IFi6EH/wgOJPv0yfs\nakQkEyXbdKPZK0OwYkUwGOr55xXyItL81ImvhZWWBgOinnoKzj8/7GpEJBco6FvQRx/BoEHw8MNB\n2IuItAQFfQvZvDmYpOwnP4GRI8OuRkRyiS7GtoDPPoMBA4Kz+V/8IuxqRCRbJHsxVkHfzHbuhG99\nK7iR969/rflrRCR1FPRpYM8e+P734ZBDYPr04KuISKqoe2XI3OHGG4Nmmz//WSEvIuFR0DeTe+6B\n1ath0SI49NCwqxGRXKagbwYTJkBRUTBJWbt2YVcjIrlOQZ9ikyfDE0/A0qXQuXPY1YiIKOhTatas\noMkmGoVu3cKuRkQkoKBPkWgUbrgB5s2D3r3DrkZEpJZGxqbA668H3Sj/8Af4xjfCrkZEZF8K+iYq\nLw8GRP3mN8HoVxGRdKOgb4KPPw7mr/nFL+DSS8OuRkSkfgr6g/TPfwZz19x8M1x3XdjViIg0TFMg\nHISKCrjoIrjgAhjf4C3SRUSal+a6aSa7dsF3vgPHHRfc2FuTlIlIWBT0zaCqKrjPa2Ul/PGP0Eqd\nU0UkRJrULMXcg/b4zZth7lyFvIhkDsVVkn72M3j1VXjxRTjssLCrERFJnoI+CY89FjTVLF0K7duH\nXY2ISOMk1b3SzArMrMzMys1sbD3rLzCz7Wb2euxxb7L7prtnnoGJE2HBAujSJexqREQaL+HFWDPL\nA8qBgcBGoAQY7u5lcdtcANzu7kMau2/ctml3MXbOHBg9GhYvhpNOCrsaEZF9JXsxNpkz+r7Au+6+\nzt0rgenA0Preswn7pp2XX4Yf/hBmz1bIi0hmSybouwLr415/HFtW19lm9oaZ/cXMTm7kvmllx46g\nG+XTT0PfvmFXIyLSNKm6GPsacJy77zCzwcAsoFdjD1JYWLj3eSQSIRKJpKi8xpk4Ec45BwoKQnl7\nEZF6RaNRotFoo/dLpo2+H1Do7gWx13cD7u4NDv43sw+BbxCEfVL7pksb/SefwCmnQEkJ9OgRdjUi\nIg1LZRt9CdDTzPLNrA0wHJhd582Ojnvel+ADZGsy+6abwkK45hqFvIhkj4RNN+5eZWa3AMUEHwxP\nuXupmY0JVvsk4Htm9iOgEvgCuOJA+zbT99Jkb70Ff/oTvPNO2JWIiKSO5rqJ861vwTe/CbfeGmoZ\nIiJJ0Vw3jbRwYXAmP3Nm2JWIiKSWbjxCMCvlHXcEc8u3aRN2NSIiqaWgJ5jm4Igj4LLLwq5ERCT1\ncr6NfscO6NULZsyAfv1a/O1FRA5aKrtXZrVHH4Vzz1XIi0j2yukz+prBUa++Ciec0KJvLSLSZLqV\nYBLGjIEvfQkmTGjRtxURSQl1r0xgzZqgK6UGR4lItsvZNvq77oL/+A/o2DHsSkREmldOntEvWADl\n5TBrVtiViIg0v5w7o9fgKBHJNTkX9E8/HVyA1eAoEckVOdXr5vPPoXdveP55OOusZn0rEZFmpwFT\n9Zg4MRgcpZAXkVySM2f0mzbB176mwVEikj00YKqOG26ADh3gkUea7S1ERFqUBkzFWbMGioqgrCzs\nSkREWl5OtNHfeacGR4lI7sr6oC8uhvfegxtvDLsSEZFwZHXQV1UFZ/MaHCUiuSyrg37qVGjfHi69\nNOxKRETCk7W9bj7/PLhz1J/+pH7zIpKdcn7A1KOPwvnnK+RFRJIKejMrMLMyMys3s7EH2O5MM6s0\ns8vilq01szfNbKWZrUhF0Yls2gSPPw4PPtgS7yYikt4S9qM3szzgCWAgsBEoMbMidy+rZ7uHgPl1\nDlENRNx9W2pKTuxnP4PrrtMIWBERSG7AVF/gXXdfB2Bm04GhQN3hR/8GzADOrLPcaMEmotWrYfZs\n3TlKRKRGMgHcFVgf9/rj2LK9zOwrwDB3/2+CYI/nwAIzKzGz0U0pNhk1d4468sjmficRkcyQqikQ\nHgPi2+7jw76/u28ysy4EgV/q7kvrO0hhYeHe55FIhEgk0qgiiovh/fc1OEpEslM0GiUajTZ6v4Td\nK82sH1Do7gWx13cD7u7j47b5oOYp0Bn4HLjB3WfXOdY44DN3n1jP+zSpe2VVFfTpA/ffr37zIpIb\nUtm9sgToaWb5ZtYGGA7sE+Du3iP2OIGgnf4md59tZm3NrF2soCOAi4E1jf1mkjF1atBcM2xYcxxd\nRCRzJWy6cfcqM7sFKCb4YHjK3UvNbEyw2ifV3SXu+dHATDPz2Hs96+7FKap9r4oKuO8+mDkTLOFn\nm4hIbsmKkbH33x/0snnuuRQXJSKSxnLmxiM1d4567TU4/vjU1yUikq5yJuivvx6OOgoefrgZihIR\nSWM5cYepVatgzhwNjhIROZCMntTsrrvg3ns1OEpE5EAyNujnz4cPPoAxY8KuREQkvWVk0FdVwR13\nBO3yunOUiMiBZWTQT5kS3Oh76NCwKxERSX8Z1+umogJ694ZZs+DMuvNkiojkkKy9w9SECRCJKORF\nRJKVUWf0GzfCqadqcJSICGTpgKnrr4dOnWD8+ANuJiKSE7JuwJQGR4mIHJyMaaO/885ghkoNjhIR\naZyMCPr58+HDDzU4SkTkYKR90McPjmrdOuxqREQyT9oH/eTJweyUGhwlInJw0rrXTUUF9OoFRUXq\nNy8iUldWDJh65BEYMEAhLyLSFGl7Rr9hA3z96/D665CfH2JhIiJpKuMHTP3wh9ClCzz0UIhFiYik\nsYweMLVqFfzlLxocJSKSCmnXRu8edKe87z7o0CHsakREMl/aBf38+bBuHdxwQ9iViIhkh6SC3swK\nzKzMzMrNbOwBtjvTzCrN7LLG7guwZ48GR4mIpFrCoDezPOAJYBBwCnClmZ3YwHYPAfMbu2+NyZOD\n2SmHDGnstyEiIg1J5oy+L/Cuu69z90pgOlDfONV/A2YA/ziIfQEYNw4efRQs4TVkERFJVjJB3xVY\nH/f649iyvczsK8Awd/9vwBqzb7wLL4QzzkiiIhERSVqqulc+Bhyw/T0ZnTsXUlgYPI9EIkQikaYe\nUkQka0SjUaLRaKP3Szhgysz6AYXuXhB7fTfg7j4+bpsPap4CnYHPgRsImnEOuG/cMZK6ObiIiARS\nOWCqBOhpZvnAJmA4cGX8Bu7eI+6NJwNz3H22mR2SaF8REWleCYPe3avM7BagmKBN/yl3LzWzMcFq\nn1R3l0T7pq58ERFJJG3nuhERkQPLimmKRUSk6RT0IiJZTkEvIpLlFPQiIllOQS8ikuUU9CIiWU5B\nLyKS5RT0IiJZTkEvIpLlFPQiIllOQS8ikuUU9CIiWU5BLyKS5RT0IiJZTkEvIpLlFPQiIllOQS8i\nkuUU9CIiWU5BLyKS5RT0IiJZTkEvIpLlFPQiIlkuqaA3swIzKzOzcjMbW8/6IWb2ppmtNLMVZtY/\nbt3a+HWpLF5ERBJLGPRmlgc8AQwCTgGuNLMT62y20N3/r7v3AX4IPBm3rhqIuHsfd++borpbRDQa\nDbuE/aim5KRjTZCedamm5KRjTclK5oy+L/Cuu69z90pgOjA0fgN33xH3sh1BuNewJN8n7aTjP6xq\nSk461gTpWZdqSk461pSsZAK4K7A+7vXHsWX7MLNhZlYKzAGui1vlwAIzKzGz0U0pVkREGi9lZ9ru\nPsvdTwKGAT+PW9Xf3U8HLgFuNrNzU/WeIiKSmLn7gTcw6wcUuntB7PXdgLv7+APs8z5wprtvrbN8\nHPCZu0+sZ58DFyIiIvtxd0u0TaskjlMC9DSzfGATMBy4Mn4DM/s/7v5+7PnpQBt332pmbYE8d68w\nsyOAi4H7D7ZYERFpvIRB7+5VZnYLUEzQ1POUu5ea2ZhgtU8CvmtmI4HdwBfA92O7Hw3MjJ2ttwKe\ndffi5vhGRESkfgmbbkREJLOF3u0x0WCsMJjZU2b2dzNbFXYtNcysm5m9aGZvmdlqM/txGtR0qJkt\njw2GWx27BpMWzCzPzF43s9lh1wLpOXDQzDqY2f+YWWns9+qsNKipV+xn9Hrs67/S5Hf9J2a2xsxW\nmdmzZtYmDWr699j/u8R54O6hPQg+aN4D8oHWwBvAiWHWFKvrXOA0YFXYtcTVdAxwWux5O+CdNPlZ\ntY19PQRYBvQNu6ZYPT8BpgGzw64lVs8HQMew66hT0xTg2tjzVkD7sGuqU18esBHoHnIdX4n9+7WJ\nvf4DMDLkmk4BVgGHxv7vFQM9Gto+7DP6hIOxwuDuS4FtYdcRz90/cfc3Ys8rgFLqGc/Q0rx2sNyh\nBGERelugmXUj6M77ZKJtW1BaDRw0s/bAee4+GcDd97j7pyGXVddFwPvuvj7hls3vEOAIM2sFtCX4\nAArTScByd9/l7lXAS8BlDW0c9i9eUoOxZF9mdjzBXxzLw61kbxPJSuATYIG7l4RdE/D/gDtJgw+d\nOOk2cPAEYIuZTY41k0wys8PDLqqOK4Dfh12Eu28EHgU+AjYA2919YbhVsQY4z8w6xno3XgJ0b2jj\nsINeGsnM2gEzgH+PndmHyt2rPZjjqBtwlpmdHGY9ZvYt4O+xv34s9kgH6TZwsBVwOvBfsbp2AHeH\nW1ItM2sNDAH+Jw1qOZKgpSGfoBmnnZn9IMya3L0MGA8sAOYCK4GqhrYPO+g3AMfFve4WWyb1iP3Z\nOAN4xt2Lwq4nXuzP/sVAQcil9AeGmNkHBGeDA8zs6ZBrwt03xb5uBmYSNFuG6WNgvbu/Gns9gyD4\n08Vg4LXYzytsFwEfuPvWWDPJn4BzQq4Jd5/s7me4ewTYDpQ3tG3YQb93MFbsKvZwIC16SZBeZ4M1\nfge87e7/P+xCAMyss5l1iD0/HPgmUBZmTe5+j7sf5+49CH6fXnT3kWHWZGZtY3+JETdwcE2YNbn7\n34H1ZtYrtmgg8HaIJdV1JWnQbBPzEdDPzA4zMyP4WZWGXBNm1iX29TjgUuC5hrZNZmRss/EGBmOF\nWROAmT0HRIBOZvYRMK7molWINfUHrgJWx9rEHbjH3V8IsaxjgamxqazzgD+4+9wQ60lX6Tpw8MfA\ns7Fmkg+Aa0OuBwg+GAnOom8IuxYAd19hZjMImkcqY18nhVsVAM+b2VEENd10oIvpGjAlIpLlwm66\nERGRZqae746mAAAALElEQVSgFxHJcgp6EZEsp6AXEclyCnoRkSynoBcRyXIKehGRLKegFxHJcv8L\npmvhyrL681sAAAAASUVORK5CYII=\n" | |
| }, | |
| "output_type": "display_data" | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": { | |
| "scrolled": true, | |
| "trusted": true, | |
| "collapsed": false | |
| }, | |
| "cell_type": "code", | |
| "source": "df = pd.DataFrame(tsvd_data)\nplt.scatter(x=df[0], y = df[1])", | |
| "execution_count": 33, | |
| "outputs": [ | |
| { | |
| "metadata": {}, | |
| "execution_count": 33, | |
| "data": { | |
| "text/plain": "<matplotlib.collections.PathCollection at 0x13d7767f0>" | |
| }, | |
| "output_type": "execute_result" | |
| }, | |
| { | |
| "metadata": {}, | |
| "data": { | |
| "text/plain": "<matplotlib.figure.Figure at 0x12b500f60>", | |
| "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYkAAAEACAYAAABGYoqtAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAH+hJREFUeJzt3X901fWd5/HnO4RIlB+FNoJCTLSChJ26VdcMM7ZjmEpq\nna6wxx2m7fyoLbprHdC2zow6PXuks91RPDvb6dRxOm2ZSmvkx9RpZTlMDWxN2+EUwygW2kRI1Rv5\nGW5RsSCYAO/94/ONXGIukNzvvd/vja/HOTn3+/3k/njne5LvO5/f5u6IiIgMpiLpAEREJL2UJERE\nJC8lCRERyUtJQkRE8lKSEBGRvJQkREQkr1iShJllzOxnZrbFzNqjsolm1mpm283sSTObkPP8e82s\ny8w6zaw5jhhERCR+cdUkTgBN7n6FuzdGZfcAG9z9MuCHwL0AZjYLWAA0AB8BHjYziykOERGJUVxJ\nwgZ5r3nA8uh4OTA/Or4RWOnux9w9A3QBjYiISOrElSQcWG9mm83slqhssrv3ALj7PuD8qHwqsDPn\ntbujMhERSZnKmN7nGnffa2Y1QKuZbSckjlxa/0NEpMzEkiTcfW/0mDWz7xOaj3rMbLK795jZFGB/\n9PTdQG3Oy6dFZW9jZkosIiLD4O6x9PUW3NxkZuea2djo+DygGdgGrAFujp72SeCJ6HgN8DEzqzKz\ni4FLgfZ87+/uqf+67777Eo9hJMSoOBVn2r/KJc44xVGTmAx8L/qvvxJocfdWM/t3YLWZfRroJoxo\nwt07zGw10AH0Abd73D+ViIjEouAk4e4vAe8fpPwV4Lo8r7kfuL/QzxYRkeLSjOsYNDU1JR3CGZVD\njKA446Y441UuccbJ0tzSY2ZqiRIRGSIzw9PScS0iIiOXkoSIiOSlJCEiInkpSYiISF5KEiIikpeS\nhIiI5KUkISIieSlJiIhIXkoSIiKSl5JEArLZLJs3byabzSYdiojIaSlJlNiKFauoq5vJ3Lm3UVc3\nkxUrViUdkohIXlq7qYSy2Sx1dTM5cuQp4HJgK9XVc+jufp6ampqkwxOREUJrN5WpTCZDVVU9IUEA\nXM7o0XVkMpnkghIROQ0liRKqr6+ntzcDbI1KttLX1019fX1yQYmInIaSRAnV1NSwbNnDVFfPYfz4\nK6munsOyZQ+rqUlEUkt9EgnIZrNkMhnq6+uVIEQkdnH2SShJiIiMMOq4FhGRklCSKDFNpBORcqIk\nUUL9E+nmzLlVE+lEpCyoT6JEstks06ZNp7f3x/RPpKuq+h127epS57WIxEp9EmVoy5Yt9PbWkDuR\nrrf3PWzZsiXJsERETktJoqT2kDuRDvYmGIuIyJlVJh3AO0VtbS1wDGgC6oEMcCwqFxFJJ9UkSuTQ\noUNUV08FHDgMOGPGXMihQ4cSjkxEJD/VJEokrM90EPgecB5wGLObtG6TiKRabDUJM6sws2fNbE10\nPtHMWs1su5k9aWYTcp57r5l1mVmnmTXHFUOanVy36SbGj//vVFffpHWbRCT1YhsCa2afA64Cxrv7\njWa2FDjg7g+a2d3ARHe/x8xmAS3A1cA0YAMwfbCxriNpCGw/rdskIsWWuiGwZjYNuAH4Zk7xPGB5\ndLwcmB8d3wisdPdj7p4BuoDGOOIoBzU1NVx99dVKECJSFuJqbvoy8OeEXtl+k929B8Dd9wHnR+VT\ngZ05z9sdlYmISMoUnCTM7PeAHnd/Djhd9WZktRsNkdZsEpFyFMfopmuAG83sBqAaGGdm3wH2mdlk\nd+8xsynA/uj5u4HcyQHTorJBLVmy5K3jpqYmmpqaYgi5tFasWMXChbdTVRV2plu27GE+/vE/SDos\nERkh2traaGtrK8p7x7p2k5ldC9wVdVw/SOi4Xpqn4/o3Cc1M6xnBHdfZbJa6upkcOfIU/Ws2VVfP\nobv7efVLiEhRpK7jOo8HgLlmth34UHSOu3cAq4EOYB1we9lngtPIZDJUVdWTu2bT6NF1ZDKZ5IIS\nETlLWgW2yFSTEJFSK5eahJA7iW4O48dfSXX1HE2iE5GyoZpEiWgSnYiUSpw1CSUJEZERRs1NIiJS\nEkoSJaLJdCJSjpQkSmDFilXU1c1k7tzbqKubyYoVq5IOSUTkrKhPosg0BFZESk19EmVEk+lEpJwp\nSRRZfX1Yrwm2RiVb6evr1o50IlIWlCSKrKamhoUL/wiYDcwAZrNw4R+pqUlEyoL6JIrsZJ/E4/Tv\nbV1dfZP6JESkaOLsk4hjqXA5jf4+iSNHmt4q6++TUJIQkbRTc1ORqU9CRMqZkkSRaYE/ESln6pMo\nES3wJyKlogX+REQkL02mExGRklCSKAEt7ici5UpJosi0uJ+IlDP1SRSRFvcTkSSoT6JMaHE/ESl3\nShJFpIl0IlLulCSKSBPpRKTcqU+iBDSRTkRKSZPpREQkL3Vci4hISShJiIhIXkoSIiKSl5KEiIjk\nVXCSMLNzzOxpM9tiZtvM7L6ofKKZtZrZdjN70swm5LzmXjPrMrNOM2suNAYRESmOWEY3mdm57v6G\nmY0CNgJ3ADcBB9z9QTO7G5jo7veY2SygBbgamAZsAKYPNoxJo5tERIYudaOb3P2N6PAcwr7ZDswD\nlkfly4H50fGNwEp3P+buGaALaIwjDhERiVcsScLMKsxsC7APWO/um4HJ7t4D4O77gPOjp08Fdua8\nfHdUJiIiKVMZx5u4+wngCjMbD3zPzP4DoTZxytOG895Llix567ipqYmmpqZhRikiMjK1tbXR1tZW\nlPeOfca1mf0P4A3gFqDJ3XvMbArwlLs3mNk9gLv70uj5PwDuc/enB3mvsu+T6OzspL29ncbGRhoa\nGpIOR0TeAVLVJ2Fm7+kfuWRm1cBcoBNYA9wcPe2TwBPR8RrgY2ZWZWYXA5cC7YXGkUaLF3+WWbOu\n4uab/5pZs65i8eI7kw5JRGRICq5JmNn7CB3TFdHXKnf/X2Y2CVgN1ALdwAJ3fy16zb3AQqAPuNPd\nW/O8d9nWJDo7O5k16ypgE/0bDsFsOjqeUY1CRIoqzppEwX0S7r4NuHKQ8leA6/K85n7g/kI/O83a\n29sJ+fHkhkMwjfb2diUJESkbmnFdJI2NjYRBXCc3HIJdUbmISHlQkiiShoYGFi26FZgNzABms2jR\nrapFiEhZ0X4SRabRTSJSatp0SERE8krVEFg5vZaWFubNm0dLS0vSoYiIDJlqEkVUW3sJu3btI6xj\nuIva2sm8/PJLSYclIiOcahJloKWlJUoQm4AdwCZ27uxRjUJEyoqSRJGsXr2aUIPInScxNSoXESkP\nShJF8uKL3cAuTp0nsZsFCxYkF5SIyBCpT6IINm7cyAc+MBdYAiwl1Ci6qKmZwP79exONTURGPvVJ\npFxrayshMfwF8DzwTWAKn/nMf0s0LhGRoVKSKILm5mZONjXVEDbs2xeVi4iUDzU3FcmHP3wDra1t\nhE33dtPc3MSTT65LOCoReSdQc1MZmDFjBmEzvjcBZ8aM6QlHJCIydKpJFMHJvSTWAecBh4EbtJeE\niJSEahIpF/aSeBdwE3Bb9DghKhcRKR9KEkVQWVkJvAY8BTwTPR6MykVEyoeSRBE8/vj3CB3WFwCb\no8ep7NixI9G4RESGSv/axiybzbJu3XpCh/VlwMXAS8BhDYEVkbKjmkTMMpkMFRXnEy5tG6G5qQ0Y\nRSaTSS4wEZFhUJKIWX19PW+++TJhxvWpzU1a3E9Eyo2am2JWU1PDmDHn8MYb3cB04BIgAxxmwYIl\nSYYmIjJkqknErKWlhTfeeAMYBfwYeJb+5iYRkXKjJBGzr33ta4QK2gy0l4SIlDsliZjV1NQAFwI7\n0V4SIlLutCxHzLSXhIgkLc5lOZQkiuDkCrBTgD38xm/MZNu25xKOSkTeKZQkysDGjRtpbW2lubmZ\na665JulwROQdJFVJwsymAd8GJgMngG+4+9+Z2URgFVBHGAO6wN0PRq+5F/g0cAy4091b87x32SYJ\nEZGkpC1JTAGmuPtzZjaWMMV4HvAp4IC7P2hmdwMT3f0eM5sFtABXExrsNwDTB8sGShIiIkOXqqXC\n3X2fuz8XHR8COgk3/3nA8uhpy4H50fGNwEp3P+buGaALaCw0DhERiV+sQ2DNrB54P7AJmOzuPRAS\nCXB+9LSphPGh/XZHZSIikjKxLcsRNTV9l9DHcMjMBrYTDavdaMmSJW8dNzU10dTUNNwQRURGpLa2\nNtra2ory3rGMbjKzSmAt8K/u/pWorBNocveeqN/iKXdvMLN7AHf3pdHzfgDc5+5PD/K+6pMQERmi\nVPVJRP4J6OhPEJE1wM3R8SeBJ3LKP2ZmVWZ2MXApoH09RURSKI7RTdcQVrLbRmhScuAvCTf+1UAt\n0E0YAvta9Jp7gYVAHyN0CGxnZyft7e00NjbS0NCQdDgi8g6SqiGwxVSuSWLx4s/y0ENfJ+THnSxa\ndCtf/epXzvQyEZFYKEmkWGdnJ7NmXUUY4HU5YXG/2XR0PKMahYiURBr7JCSyYcMGwjSRU5cJD+Ui\nIuVFSSJmkydPBnYxcJnwUC4iUl7U3BSzbDbLlCkXceKEEWoUu6iocPbteznaa0JEpLjU3JRiNTU1\n3HbbLcCbVFYe4JxzRvPoo48oQYhIWVJNIma1tZewa9c++msRF1zwHvbseTnpsETkHUQ1iZRqaWmJ\nEsQmYAewib17f0VLS0vCkYmIDI+SRIxWr17NYCObQrmISPlRkojRtGkXMdjIpgULFiQXlIhIAdQn\nEZNsNktd3UyOHKkEfk1Y/Xy3+iREpOTUJ5FCmUyGY8dGA68DY4GXgAqeeOLxZAMTESlAbPtJvNP1\n9vbS1/cacC5hzaZjwGF6e3uTDUxEpACqScTkRz/6ETAK+Bfga9FjZVQuIlKeVJOI1XjgJqAeyADj\nkgxGRKRgqknE5PLLLwcOAk8Bz0SPr0flIiLlSUkiJgcOHCCMaDp1jkQoFxEpT0oSMXnppW5gNwPn\nSFx66aXJBSUiUiAliRhks1m+9KUHgOPA7wCXRY/HaW/X9t0iUr6UJGKQyWQ4fvwE8G7CCKfq6HES\n69atSzQ2EZFCKEnEIMyFeBN4jdBh/Vz0eJATJ04kGZqISEGUJGLwy1/+kjD8dSpwAbA5epzKRRdd\nlGRoIiIF0TyJGDQ2NgJHgZ2E/oiLCctyHOaWW25JMjQRkYKoJhGDhoYGrr32A4TL2UaYJ9GGWSUz\nZsxINDYRkUIoScQgm83y059uBmaQO0/C/QK2bNmSYGQiIoVRkojBP/7jN+jtfTehuSl3nsQeXn5Z\ny4SLSPlSn0SBwhyJpUAf8GfABwm7070EjOPIkSNJhiciUhDVJAqUyWRwH0dYGvx/A5MJCeIG4Ndc\nd911SYYnIlIQ1SQKNHbsWHp7DxA2Gmoj9ElsBWbz8Y/fRENDQ5LhiYgUJJaahJktM7MeM9uaUzbR\nzFrNbLuZPWlmE3K+d6+ZdZlZp5k1xxFDUnbu3Am8hzDs9eQciVGjavnc5+5INDYRkULF1dz0LeDD\nA8ruATa4+2XAD4F7AcxsFrAAaAA+AjxsZrHsxZqcA8AOYDrwx8B0jh/PMHbs2GTDEhEpUCxJwt3/\nDXh1QPE8YHl0vByYHx3fCKx092PungG6gMY44kjCFVdcQUWFEzquDRgTPcK2bdsSjExEpHDF7Lg+\n3917ANx9H3B+VD6VMFa03+6orCzV1NTw+78/n3Apf0RYtylsZRqW6xARKV+l7Lj24bxoyZIlbx03\nNTXR1NQUUzjxaWy8mlWrNnPqhkMXMmbMmASjEpF3ira2Ntra2ory3sVMEj1mNtnde8xsCrA/Kt8N\n1OY8b1pUNqjcJJFWP/7xj4G9hFFN/aOb9mpXOhEpiYH/QH/xi1+M7b3jbG4y+hvjgzXAzdHxJ4En\ncso/ZmZVZnYxcClQtjvzZLNZnnhiHWGeRBNwZfR4TLOtRaTsxVKTMLPHCHfGd5vZy8B9wAPAP5vZ\np4Fuwogm3L3DzFYDHYTe3tvdfVhNUWmQyWQ4mR+PEipMx4BJ7Nu3L8HIREQKF0uScPdP5PnWoNON\n3f1+4P44PjtpYcOhUcAmwjyJ9cDtwGtcfPHFSYYmIlIwLctRoDCCaRqhL6IG+ARhcl2fluQQkbKn\nJFGgsOHQLgau/gqjmDNnTmJxiYjEQWs3FaihoYGJE8fy6quzCdM9dgNv8thjj1FTU5NwdCIihVGS\nKNDatWt59dVDwDrgPOAwcAPjxp2XbGAiIjGwNA8sMrPUD3xqbm5m/foXgZ8CGaAe+C3e975z2bp1\n6+leKiJSFGaGu8eyJp5qEgXauXM3oU9iOjAF2AccYdu2MIdCTU4iUs7UcV2AbDbL9u0vRmcVhOam\nCsKQ2Artby0iZU81iQKEXelGA8eBfwIOAhOAP4weRUTKm5JEAcJ+EUeAcYT5EbWEBW6rgFe54oor\nEoxORKRwam4qQNiVbhRhOY5NwPbosZcPfeiDZ9Uf0dnZyfLly+ns7CxqrCIiw6EkUYCwgN8Y4EJO\nXSZ8Kq+8cuYVYBcv/iyzZl3FzTf/NbNmXcXixXcWL1gRkWFQkijAz3/+c+BN4BVOnXG9m6NHj572\ntZ2dnTz00NfJrYE89NA3VKMQkVRRkihAd3c3YZb1PwBzCMuE/xbQy/Tp00/72vb2dkIfRm4NZFpU\nLiKSDkoSBXjhhRcI6zQ1AM8DfwacAN5FRUX+S7tx40a+//3vAy8A349KtwK7orWgRETSQaObCtDR\n0UEY2dRIGPJ6kNBH8eu8zU3NzTewfn0boR/jHOAPgHOBN1m06FYaGhpKELmIyNlRTaIAx48fJ6zV\nVAGMjx4PA5VMmPD2eRIbN26MEsQm4JeEpTzOA47y6KPf4Ktf/UqJIhcROTtKEgWrJNz0u6LH0UAv\n48aNe9szW1tbObn3BNFjPfAeduzYUYpgRUSGRElimLLZbHQ0cPjrhYDxzDPPvO35tbW1vH3viQzw\nK5qbm4sdsojIkKlPYphO7m29l3Czvzx63Av08frrr7/13BUrVrFw4e1UVdVjBu6zCclkL3CM5uYP\ncc0115T6RxAROSMliWHq6ekBHDgGXAtcBLwcnUNVVRUQahALF97OkSNPceRISCTnnPM7zJ//n6iu\nruaWW25RghCR1FKSGKbPf/7z0ZERluV4NXoMS7jv3/8rINQ4qqrqowQBcDnnnHMJd911F1dffXVp\ngxYRGSL1SQxTV1cX4fIZsAx4Bnia/kt64MAh1q5dy9ixYzl69EVy+yH6+rqpr68vfdAiIkOkJFGQ\nsG8E/A0wE+gkzMAeDUzlgQeWctVVH6CiYiIwm9GjG6io+G3e+95pPP3008mFLSJylrR96TCZGWES\n3E852WndRFg6/ChhuXCA/wnUAX9C6MOoJHRa7+F977uMrVu1MZGIxCvO7UtVkyjIBZw6/HUS0Ee4\nrBXAYuB+4JbofCwnJ9JtYtu27axdu7bUQYuInDUliYLsAdqAzdHjHuASTo4HWENYJdaAGuBiBi4p\nHtZwEhFJJ41uKkgfcANhFvUuoJeQKC4AFgK3EeZCzAb2A4c4dU7FbubPn1/6sEVEzpL6JIYp9ElU\nA+sI6y8dJiSMBkIHdj3QAzwMfAF4iZCTKwlJRH0SIlIccfZJJJYkzOx64G8JTV7L3H3pIM9JeZK4\ngNCcVE9YXqOKUGM4D/h/hFVem+jvyDY7wmWXXUJtbS133HEHH/3oRxOIXERGujiTRCLNTWZWATwE\nfIjQPrPZzJ5w9+eTiGf4XgMeIywRPgH4BGE/iV7gOeBWQmd2N3Ccn/zkh5pdLSJlJak+iUagy927\nAcxsJTCPsHNPGRlDSAy1wE5CTeIo8MfAPcB0Qg48waJFtytBiEjZSSpJTCXcVfvtIiSOMnOUMKS1\nvyN6NmEk0zFCDeJ6LrmklrVr12gzIREpSxrdVJCBe0NMJYxmugjYDbzJCy90JRSbiEjhkkoSuwl3\n0n7TorK3WbJkyVvHTU1NNDU1FTOuIerfG+LkkNYwLHYp8CZp7XQXkZGlra2Ntra2orx3IqObzGwU\nsJ3Qcb0XaAc+7u6dA56X8tFNRuiXmEp/zQFOcO654zh8+PXTvVxEpGjKflkOdz8OLAJagV8AKwcm\niLQ7mbz6CDWKkCAeffRRJQgRGTE0ma5AoUYRpD1WEXlnKPt5EiOJEoOIjGRa4E9ERPJSkhARkbyU\nJEREJC8lCRERyUtJQkRE8lKSEBGRvJQkREQkLyUJERHJS0lCRETyUpIQEZG8lCRERIBsNsvmzZvJ\nZrNJh5IqShIikoiBN+VCz4f6eZ2dnSxfvpzOzk5WrFhFXd1M5s69jbq6maxYseqM75f7+uHEUzbc\nPbVfITwRGWkee2ylV1dP8gkTrvTq6km+aNGdA87vGNL5Y4+t9I6ODn/kkUe8o6PD3d3379/v7e3t\nvn///rd93ty5H3GodpjhUO2jRp3r8DMHd/iZV1dP8v379+eNf9GiO095fXPzR94WT5Kie2c89+G4\n3qgYX0oSIiPP/v37vbp6Us5N+anohnvyJh3Onzrr88rKcQ5jBr1pjxnzLq+qmnCGzzvXYX907j5+\n/BXe3t4+aPwdHR1njPdMSabY4kwSam4SkZLKZDJUVdVzcn/484BaTt0vflpUfnbnx46dDywjbHi5\nidbWNo4ceZyDB5/h6NG/p7e3ZsDnDdyf/gJgfXS+lb6+burr6weNv729fZB4p54Sz+jRdWQymbO6\nHmmnJCEiJVVfX09vb4awLzzAYWBnzvlWwm6Ph4dwvheYG50PvGnPBfYM+LxdnPp5exgz5k8ZP/5K\nqqvnsGzZw9TU1Awaf2Nj4yDx7j4lntMlmbITV5WkGF+ouUlkROrvIxg//opT+hiGcz5mzESHqtM2\n/4wePfaU1zc39/dJTHeo9kWL7jilD+NMFi2645TX9zdv9b//SOqT0PalIpKIbDZLJpOhvr6empqa\ngs7/6q++xEMPfYPQjLSL5uYmfvKTpxk9uo6+vm6WLXuY66773VNe39nZSXt7O42NjTQ0NAw5/oGv\nHxhfkuLcvlRJQkRGhDTftEtNSUJERPKKM0mo41pERPJSkhARkbyUJEREJC8lCRERyUtJQkRE8lKS\nEBGRvJQkREQkr4KShJn9VzP7uZkdN7MrB3zvXjPrMrNOM2vOKb/SzLaa2Q4z+9tCPl9ERIqr0JrE\nNuC/AD/KLTSzBmAB0AB8BHjYzPondvwDsNDdZwAzzOzDBcaQuLa2tqRDOKNyiBEUZ9wUZ7zKJc44\nFZQk3H27u3cBA2f2zQNWuvsxd88AXUCjmU0Bxrn75uh53wbmFxJDGpTDL045xAiKM26KM17lEmec\nitUnMZWwlm6/3VHZVMIavf12RWUiIpJClWd6gpmtBybnFgEOfMHd/2+xAhMRkeTFssCfmT0F3OXu\nz0bn9xDWM18anf8AuA/oBp5y94ao/GPAte7+mTzvq9X9RESGIa4F/s5YkxiC3IDWAC1m9mVCc9Kl\nQLu7u5kdNLNGYDPwJ8Df5XvDuH5IEREZnkKHwM43s53AbGCtmf0rgLt3AKuBDmAdcHvOmt9/StiM\ndgfQ5e4/KCQGEREpnlTvJyEiIslKxYxrM3swmnT3nJk9bmbjc76X2kl5Zna9mT0fxXB3EjHkxDLN\nzH5oZr8ws21mdkdUPtHMWs1su5k9aWYTcl4z6LUtQawVZvasma1JcYwTzOyfo8/9hZn9Zkrj/Fw0\noXWrmbWYWVUa4jSzZWbWY2Zbc8qGHFex/87zxJm6+9FgceZ87y4zO2Fmk4oSZ1ybZRfyBVwHVETH\nDwD3R8ezgC2EvpN64JecrP08DVwdHa8DPlzimCuieOqA0cBzwMwEr+EU4P3R8VhgOzATWAr8RVR+\nN/DAma5tCWL9HPAosCY6T2OMjwCfio4rgQlpixO4EHgRqIrOVwGfTEOcwAeA9wNbc8qGHFex/87z\nxJm6+9FgcUbl04AfAC8Bk6KyhjjjTEVNwt03uPuJ6HQT4QcHuJH0TsprJPSpdLt7H7CSMIkwEe6+\nz92fi44PAZ2E6zgPWB49bTknr9Og17bYcZrZNOAG4Js5xWmLcTzwQXf/FkD0+QfTFmdkFHCemVUC\n1YQ5SYnH6e7/Brw6oHhIcZXi73ywONN4P8pzPQG+DPz5gLJYJzOnIkkM8GlChoN0T8obGFtqJgaa\nWT3hv45NwGR374GQSIDzo6flu7bF1v9LndsZlrYYLwZ+ZWbfiprFvm5m56YtTnffA/wN8HL0mQfd\nfUPa4sxx/hDjSsPfeWrvR2Z2I7DT3bcN+FascZYsSZjZ+qgtrP9rW/T4n3Oe8wWgz91XlCqukcbM\nxgLfBe6MahQDRyYkNlLBzH4P6IlqPKcb3pz0aIpK4Erg7939SuAwcA8pupYAZvYuwn+NdYSmp/PM\n7A8HiSvp65lPWuMC0n0/MrNq4C8J88+KKs55Eqfl7nNP930zu5nQDPG7OcW7gdqc82lRWb7yUtoN\nXJRwDKeImhy+C3zH3Z+IinvMbLK790TVzf1ReRLX8BrgRjO7gdA0Ms7MvgPsS1GMEP7D2unu/x6d\nP05IEmm6lhDazl9091cAzOx7wG+nMM5+Q40rsXjL4H70XkJ/w8/MzKLPfNbCHLR896bhxRln50oB\nnTLXA78A3j2gvL+jqIrQBJDbAbOJ0J5qhOrg9SWOeRQnO66rCB3XDQlfx28D/2dA2VLg7uh4sM7C\nt13bEsV6LSc7rh9MW4yElY1nRMf3RdcxVdcy+v3fBoyJ/g4eIcxDSkWchJvYtkJ+F0vxdz5InKm8\nHw2Mc8D3XgImFiPOov+xneUP30VYsuPZ6OvhnO/dG/2QnUBzTvlV0R9IF/CVhOK+njCKqAu4J+Fr\neA1wnJCstkTX8XpgErAhirMVeNeZrm2J4s1NEqmLEfiPhFUBngP+hTC6KY1x3hd95lZCZ/DoNMQJ\nPAbsAd4k9Jl8Cpg41LiK/XeeJ87U3Y8Gi3PA918kGt0Ud5yaTCciInmlcXSTiIikhJKEiIjkpSQh\nIiJ5KUmIiEheShIiIpKXkoSIiOSlJCEiInkpSYiISF7/H8AbScQ7L378AAAAAElFTkSuQmCC\n" | |
| }, | |
| "output_type": "display_data" | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": true, | |
| "collapsed": true | |
| }, | |
| "cell_type": "code", | |
| "source": "#kmeans cluster\n# groupby of the original dataframe\n# brandonrose.org/clustering\n", | |
| "execution_count": null, | |
| "outputs": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "name": "python3", | |
| "display_name": "Python 3", | |
| "language": "python" | |
| }, | |
| "language_info": { | |
| "pygments_lexer": "ipython3", | |
| "version": "3.5.2", | |
| "codemirror_mode": { | |
| "version": 3, | |
| "name": "ipython" | |
| }, | |
| "mimetype": "text/x-python", | |
| "file_extension": ".py", | |
| "nbconvert_exporter": "python", | |
| "name": "python" | |
| }, | |
| "gist": { | |
| "id": "", | |
| "data": { | |
| "description": "projects/dnc_leaks/DNC-Modeling.ipynb", | |
| "public": true | |
| } | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 0 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment