Created
January 21, 2021 15:54
-
-
Save TC-Alex/7a29e5936be8f0bd69de82bbbc828d02 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| {"cells":[{"metadata":{"trusted":false},"cell_type":"code","source":"import pandas as pd\nimport numpy as np\nimport csv\n\ndf = pd.read_csv('claim_report_tunecore_C.csv',usecols=['claim_status','views','matching_duration','claim_created_date','video_duration_sec'])","execution_count":1,"outputs":[]},{"metadata":{"trusted":false},"cell_type":"code","source":"df.head()","execution_count":2,"outputs":[{"data":{"text/html":"<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>claim_status</th>\n <th>views</th>\n <th>matching_duration</th>\n <th>claim_created_date</th>\n <th>video_duration_sec</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>ACTIVE</td>\n <td>35</td>\n <td>0</td>\n <td>2020/09/08</td>\n <td>148</td>\n </tr>\n <tr>\n <th>1</th>\n <td>ACTIVE</td>\n <td>0</td>\n <td>0</td>\n <td>2020/06/27</td>\n <td>109</td>\n </tr>\n <tr>\n <th>2</th>\n <td>ACTIVE</td>\n <td>743</td>\n <td>66</td>\n <td>2020/08/17</td>\n <td>215</td>\n </tr>\n <tr>\n <th>3</th>\n <td>ACTIVE</td>\n <td>221</td>\n <td>94</td>\n <td>2020/08/16</td>\n <td>215</td>\n </tr>\n <tr>\n <th>4</th>\n <td>ACTIVE</td>\n <td>3</td>\n <td>0</td>\n <td>2020/03/21</td>\n <td>185</td>\n </tr>\n </tbody>\n</table>\n</div>","text/plain":" claim_status views matching_duration claim_created_date \\\n0 ACTIVE 35 0 2020/09/08 \n1 ACTIVE 0 0 2020/06/27 \n2 ACTIVE 743 66 2020/08/17 \n3 ACTIVE 221 94 2020/08/16 \n4 ACTIVE 3 0 2020/03/21 \n\n video_duration_sec \n0 148 \n1 109 \n2 215 \n3 215 \n4 185 "},"execution_count":2,"metadata":{},"output_type":"execute_result"}]},{"metadata":{"trusted":false},"cell_type":"code","source":"set(df['claim_status'])","execution_count":3,"outputs":[{"data":{"text/plain":"{'ACTIVE'}"},"execution_count":3,"metadata":{},"output_type":"execute_result"}]},{"metadata":{},"cell_type":"markdown","source":"^ Good to know. Don't have to filter out any of these by claim status."},{"metadata":{"trusted":false},"cell_type":"code","source":"def bucket(seconds):\n if seconds < 1:\n return 'N/A'\n if seconds >= 1 and seconds < 5:\n return '1-5'\n if seconds >= 5 and seconds < 10:\n return '5-10'\n if seconds >= 10 and seconds < 20:\n return '10-20'\n if seconds >= 20 and seconds < 30:\n return '20-30'\n if seconds >= 30 and seconds < 40:\n return '30-40'\n if seconds >= 40 and seconds < 50:\n return '40-50'\n if seconds >= 50 and seconds < 60:\n return '50-60'\n if seconds >= 60 and seconds < 90:\n return '60-90'\n if seconds >= 90 and seconds < 120:\n return '90-120'\n else:\n return '120+'","execution_count":13,"outputs":[]},{"metadata":{"trusted":false},"cell_type":"code","source":"df['matched_percentage'] = np.ceil(100*df['matching_duration']/df['video_duration_sec'])\n#df['bucket'] = df['matching_duration'].map(lambda x: bucket(x))\n\n#df['qualified_claim'] = df['claimed_percentage'].map(lambda x: 1 if x >= 0.8 else 0)\n#df[df['claim_created_date'] >= '2020'].groupby('bucket').agg({'views':sum})","execution_count":8,"outputs":[]},{"metadata":{"trusted":false},"cell_type":"code","source":"df.head()","execution_count":9,"outputs":[{"data":{"text/html":"<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>claim_status</th>\n <th>views</th>\n <th>matching_duration</th>\n <th>claim_created_date</th>\n <th>video_duration_sec</th>\n <th>matched_percentage</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>ACTIVE</td>\n <td>35</td>\n <td>0</td>\n <td>2020/09/08</td>\n <td>148</td>\n <td>0.0</td>\n </tr>\n <tr>\n <th>1</th>\n <td>ACTIVE</td>\n <td>0</td>\n <td>0</td>\n <td>2020/06/27</td>\n <td>109</td>\n <td>0.0</td>\n </tr>\n <tr>\n <th>2</th>\n <td>ACTIVE</td>\n <td>743</td>\n <td>66</td>\n <td>2020/08/17</td>\n <td>215</td>\n <td>31.0</td>\n </tr>\n <tr>\n <th>3</th>\n <td>ACTIVE</td>\n <td>221</td>\n <td>94</td>\n <td>2020/08/16</td>\n <td>215</td>\n <td>44.0</td>\n </tr>\n <tr>\n <th>4</th>\n <td>ACTIVE</td>\n <td>3</td>\n <td>0</td>\n <td>2020/03/21</td>\n <td>185</td>\n <td>0.0</td>\n </tr>\n </tbody>\n</table>\n</div>","text/plain":" claim_status views matching_duration claim_created_date \\\n0 ACTIVE 35 0 2020/09/08 \n1 ACTIVE 0 0 2020/06/27 \n2 ACTIVE 743 66 2020/08/17 \n3 ACTIVE 221 94 2020/08/16 \n4 ACTIVE 3 0 2020/03/21 \n\n video_duration_sec matched_percentage \n0 148 0.0 \n1 109 0.0 \n2 215 31.0 \n3 215 44.0 \n4 185 0.0 "},"execution_count":9,"metadata":{},"output_type":"execute_result"}]},{"metadata":{"trusted":false},"cell_type":"code","source":"df.boxplot(column=['matched_percentage'])","execution_count":10,"outputs":[{"data":{"text/plain":"<matplotlib.axes._subplots.AxesSubplot at 0x1fdad977710>"},"execution_count":10,"metadata":{},"output_type":"execute_result"},{"data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAAAYMAAAD5CAYAAADFqlkBAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAaY0lEQVR4nO3df3TV9Z3n8ecrCRB+lBKURn44pKeye0Kj29Ucq5TTk0DHKrOLzG45wulWWnPgDLaZ9tgdtM3sevojjnW2O1PbqZZjrLha0NZxC6wWWZpMS/1RofUHkrayVUoKp+5IgiUMAZL3/nE/YS8QyA1ecnPl9Tjnnvv9vr+f7/e+r1zzyvdH7lcRgZmZnd9KCt2AmZkVnsPAzMwcBmZm5jAwMzMcBmZmBpQVuoGzdeGFF0ZVVVWh2zA7RXd3N+PHjy90G2an2L59+z9HxJSBlhVtGFRVVbFt27ZCt2F2ira2Nurq6grdhtkpJO0+3TIfJjIzM4eBmZk5DMzMDIeBmZnhMDAzMxwGZnmzdu1aampqmD9/PjU1Naxdu7bQLZnlrGgvLTUbSdauXUtTUxMtLS309vZSWlpKQ0MDAEuXLi1wd2aD856BWR40NzfT0tJCfX09ZWVl1NfX09LSQnNzc6FbM8uJw8AsD9rb25k7d+4Jtblz59Le3l6gjsyGxmFglgfV1dVs3br1hNrWrVuprq4uUEdmQ+MwMMuDpqYmGhoaaG1t5dixY7S2ttLQ0EBTU1OhWzPLiU8gm+VB/0nixsZG2tvbqa6uprm52SePrWioWO+BXFtbG/6iOhuJ/EV1NlJJ2h4RtQMt82EiMzPLLQwkvS7pZUkvSNqWapMlbZb0anquSHVJulvSLkkvSbo8azvL0vhXJS3Lql+Rtr8rrat8v1EzMzu9oewZ1EfEB7J2MW4DtkTELGBLmge4DpiVHiuAeyATHsDtwAeBK4Hb+wMkjVmRtd61Z/2OzMxsyN7OYaLrgTVpeg2wKKv+YGQ8C0ySNBX4KLA5IvZHRCewGbg2LZsYEc9E5gTGg1nbMjOzYZDr1UQBPCUpgO9ExGqgMiL2AUTEPknvSWOnA3uy1u1ItTPVOwaon0LSCjJ7EFRWVtLW1pZj+2bD5+DBg/5sWtHJNQw+FBF70w/8zZJ+dYaxAx3vj7Oon1rMhNBqyFxN5Cs2bCTy1URWjHI6TBQRe9PzG8DjZI75/yEd4iE9v5GGdwAXZ60+A9g7SH3GAHUzMxsmg4aBpPGS3tU/DVwD7ADWA/1XBC0Dfpim1wM3pquKrgIOpMNJm4BrJFWkE8fXAJvSsj9KuipdRXRj1rbMzGwY5HKYqBJ4PF3tWQZ8LyJ+JOl54FFJDcDvgMVp/BPAAmAXcAj4FEBE7Jf0FeD5NO7LEbE/Ta8EHgDGAk+mh5mZDZNBwyAifgv8mwHqbwLzB6gH8OnTbOt+4P4B6tuAmhz6NTOzc8B/gWxmZg4DMzNzGJiZGQ4DMzPDYWBmZjgMzMwMh4GZmeEwMDMzHAZmZobDwMzMcBiYmRkOAzMzw2FgZmY4DMzMDIeBmZnhMDAzMxwGZmaGw8DMzHAYmJkZDgMzM8NhYGZmOAzMzAyHgZmZ4TAwMzMcBmZmhsPAzMxwGJiZGQ4DMzPDYWBmZgwhDCSVSvqlpI1p/r2SnpP0qqRHJI1O9TFpfldaXpW1jS+k+q8lfTSrfm2q7ZJ0W/7enpmZ5WIoewafBdqz5r8G/F1EzAI6gYZUbwA6I+IS4O/SOCTNBpYA7weuBb6dAqYU+AfgOmA2sDSNNTOzYZJTGEiaAfwZcF+aFzAP+EEasgZYlKavT/Ok5fPT+OuBdRHRExGvAbuAK9NjV0T8NiKOAOvSWDMzGyZlOY77e2AV8K40fwHQFRHH0nwHMD1NTwf2AETEMUkH0vjpwLNZ28xeZ89J9Q8O1ISkFcAKgMrKStra2nJs32z4HDx40J9NKzqDhoGkfwe8ERHbJdX1lwcYGoMsO119oL2TGKBGRKwGVgPU1tZGXV3dQMPMCqqtrQ1/Nq3Y5LJn8CFgoaQFQDkwkcyewiRJZWnvYAawN43vAC4GOiSVAe8G9mfV+2Wvc7q6mZkNg0HPGUTEFyJiRkRUkTkB/OOI+DjQCnwsDVsG/DBNr0/zpOU/johI9SXpaqP3ArOAnwPPA7PS1Umj02usz8u7MzOznOR6zmAgtwLrJH0V+CXQkuotwP+QtIvMHsESgIh4RdKjwE7gGPDpiOgFkPQZYBNQCtwfEa+8jb7MzGyIhhQGEdEGtKXp35K5EujkMYeBxadZvxloHqD+BPDEUHoxM7P88V8gm5mZw8DMzBwGZnmzdu1aampqmD9/PjU1Naxdu7bQLZnl7O2cQDazZO3atTQ1NdHS0kJvby+lpaU0NGS+oWXp0qUF7s5scN4zMMuD5uZmWlpaqK+vp6ysjPr6elpaWmhuPuV6CbMRyWFglgft7e3MnTv3hNrcuXNpb28/zRpmI4vDwCwPqqur2bp16wm1rVu3Ul1dXaCOzIbG5wzM8qCpqYkbbriB8ePHs3v3bmbOnEl3dzff+MY3Ct2aWU68Z2CWZ5lvbDcrLg4Dszxobm7mkUce4bXXXmPLli289tprPPLIIz6BbEXDYWCWBz6BbMXOYWCWBz6BbMXOYWCWB01NTTQ0NNDa2sqxY8dobW2loaGBpqamQrdmlhNfTWSWB/1/ZdzY2Eh7ezvV1dU0Nzf7r4+taChz35niU1tbG9u2bSt0G2an8G0vbaSStD0iagda5sNEZmbmMDAzM4eBmZnhMDAzMxwGZmaGw8DMzHAYmJkZDgMzM8NhYGZmOAzMzAyHgZmZ4TAwMzMcBmZ509jYSHl5OfX19ZSXl9PY2FjolsxyNmgYSCqX9HNJL0p6RdKXUv29kp6T9KqkRySNTvUxaX5XWl6Vta0vpPqvJX00q35tqu2SdFv+36bZudXY2Mi9997LHXfcwZNPPskdd9zBvffe60CwojHoV1grc3fv8RFxUNIoYCvwWeAW4B8jYp2ke4EXI+IeSTcDl0XEX0haAvx5RNwgaTawFrgSmAb8b+BfpZf5DfCnQAfwPLA0InaeqS9/hbWNJOXl5dTW1rJt2zZ6enoYM2bM8fnDhw8Xuj0z4G1+hXVkHEyzo9IjgHnAD1J9DbAoTV+f5knL56dAuR5YFxE9EfEasItMMFwJ7IqI30bEEWBdGmtWNHp6enjuuedO2DN47rnn6OnpKXRrZjnJ6ZyBpFJJLwBvAJuB/wN0RcSxNKQDmJ6mpwN7ANLyA8AF2fWT1jld3ayoLFiwgFtuuYXy8nJuueUWFixYUOiWzHKW020vI6IX+ICkScDjwEB3+e4/3qTTLDtdfaBAGvDYlaQVwAqAyspK2trazty42TDauHEjN998M/PmzePmm29m48aNAP6cWlEY0j2QI6JLUhtwFTBJUln67X8GsDcN6wAuBjoklQHvBvZn1ftlr3O6+smvvxpYDZlzBr61oI0U/ecI7r//fu655x7GjBnD1VdfzbZt23wLTCsKuVxNNCXtESBpLPARoB1oBT6Whi0Dfpim16d50vIfR+Ys9XpgSbra6L3ALODnZE4Yz0pXJ40GlqSxZkVj+fLlPPPMM0yaNAmASZMm8cwzz7B8+fICd2aWm1zOGUwFWiW9ROYH9+aI2AjcCtwiaReZcwItaXwLcEGq3wLcBhARrwCPAjuBHwGfjojetGfxGWATmZB5NI01Kxpz5sxh/Pjx7N+/H4D9+/czfvx45syZU+DOzHIz6KWlI5UvLbWRpKamhm9+85vU19fT1tZGXV0dra2tNDY2smPHjkK3Zwa8zUtLzWxw7e3tdHR0UFNTw/z586mpqaGjo4P29vZCt2aWkyGdQDazgU2bNo1bb72Vhx9+mN7eXkpLS/n4xz/OtGnTCt2aWU68Z2CWJycfci3WQ7B2fvKegVke7N27l9mzZzNv3rzjtZqaGnbuPOO3qpiNGN4zMMuDsWPHsmPHDlauXMmGDRtYuXIlO3bsYOzYsYVuzSwnDgOzPOju7mbChAksXryY8vJyFi9ezIQJE+ju7i50a2Y58WEiszxZsmQJ11133fFvLf3EJz7BfffdV+i2zHLiPQOzPHnooYeYOnUqJSUlTJ06lYceeqjQLZnlzGFglgdjxozh8OHDXHbZZTz22GNcdtllHD58mDFjxhS6NbOc+DCRWR709PRw+eWXs2HDBtavX48kLr/8cn7xi18UujWznHjPwCxP7rzzTvr6+mhtbaWvr48777yz0C2Z5cx7BmZ5MGPGDBYvXkxFRQW7d+9m5syZdHZ2MmPGjEK3ZpYT7xmY5cGiRYs4cOAAr7/+OhHB66+/zoEDB1i0aNHgK5uNAA4Dszz47ne/C0BFRQWSqKioOKFuNtI5DMzyoLu7m+nTp9PV1UVE0NXVxfTp0/1HZ1Y0fM7ALE9+//vfH5+OiBPmzUY67xmY5dHChQt5/PHHWbhwYaFbMRsS7xmY5dFPf/pTNmzYcPxeyGbFwnsGZnkybdq0E84Z+MY2VkwcBmZ5snfvXiZNmkRJSQmTJk1i7969hW7JLGcOA7M8uPTSSwHo7Oykr6+Pzs7OE+pmI53DwCwPfvOb3wypbjbSOAzM8qCnpweAiy66iJKSEi666KIT6mYjna8mMsuTCRMm8L3vfY/e3l5KS0tZuHAhBw8eLHRbZjnxnoFZnhw5cuSM82YjmfcMzPLkyJEjzJs3r9BtmJ0V7xmY5cHp7mjmO51ZsXAYmOXB6U4U+wSyFQuHgVmelJeXn3HebCQbNAwkXSypVVK7pFckfTbVJ0vaLOnV9FyR6pJ0t6Rdkl6SdHnWtpal8a9KWpZVv0LSy2mduyXpXLxZs3Pp8OHDrFy5kg0bNrBy5UoOHz5c6JbMcpbLnsEx4PMRUQ1cBXxa0mzgNmBLRMwCtqR5gOuAWemxArgHMuEB3A58ELgSuL0/QNKYFVnrXfv235rZ8LvkkksoKyvjkksuKXQrZkMy6NVEEbEP2Jem/yipHZgOXA/UpWFrgDbg1lR/MCICeFbSJElT09jNEbEfQNJm4FpJbcDEiHgm1R8EFgFP5uctmg2fz3/+84VuweysDOnSUklVwL8FngMqU1AQEfskvScNmw7syVqtI9XOVO8YoD7Q668gswdBZWUlbW1tQ2nfrCD8ObVikHMYSJoAPAZ8LiLeOsNh/YEWxFnUTy1GrAZWA9TW1kZdXd0gXZsNr4qKCu666y5WrVp1/Mvq/Dm1YpDT1USSRpEJgocj4h9T+Q/p8A/p+Y1U7wAuzlp9BrB3kPqMAepmRWXcuHF0dXWxfPlyurq6GDduXKFbMstZLlcTCWgB2iPiv2ctWg/0XxG0DPhhVv3GdFXRVcCBdDhpE3CNpIp04vgaYFNa9kdJV6XXujFrW2ZF46abbmL27NmUlJQwe/ZsbrrppkK3ZJazXA4TfQj4BPCypBdS7YvAncCjkhqA3wGL07IngAXALuAQ8CmAiNgv6SvA82ncl/tPJgMrgQeAsWROHPvksRWdb33rW3z9619n9uzZ7Ny50yeTragoc9FP8amtrY1t27YVug0zAMrKyujt7WXUqFEcPXr0+HNpaSnHjh0rdHtmAEjaHhG1Ay3zXyCb5UFEMG7cOI4ePQrA0aNHGTduHMX6y5adfxwGZnkwbdo0Ro0aRVVVFSUlJVRVVTFq1CimTZtW6NbMcuKvsDbLg0OHDvHWW29x8OBB+vr62LNnD319fZSWlha6NbOceM/ALA/2789cCzFlyhRKSkqYMmXKCXWzkc5hYJYny5cvZ9++fWzZsoV9+/axfPnyQrdkljMfJjLLk3Xr1vHUU0+xe/duZs6c6b0CKyoOA7M8KCkp4a233qK7u5uIYM+ePfT29lJS4p1vKw7+pJrlwdixYwGYOHEikpg4ceIJdbORzmFglgfd3d1UVVXR2dlJRNDZ2UlVVRXd3d2Fbs0sJz5MZJYne/bsOeHrKFatWlXolsxy5jAwy5O+vr4Tvo/Id2+1YuLDRGZ5EhGUl5cDUF5e7q+isKLiMDDLk3HjxtHT0wNAT0+P72dgRcVhYJYnhw4dYvz48QCMHz+eQ4cOFbgjs9w5DMzy6ODBgyc8mxULh4FZHs2ZM4fvf//7zJkzp9CtmA2JryYyy5OKigqefvppnn766ePznZ2dBe7KLDfeMzDLk66uLiorKwGorKykq6urwB2Z5c5hYJYHkydPJiJ48803AXjzzTeJCCZPnlzgzsxy4zAwy4Nx48YxevTo4/c7PnbsGKNHj/blpVY0fM7ALA86OjpOqR05cmTAutlI5D0DszyRdMI5A38dhRUTh4FZnkQEq1at4sknn2TVqlX+OgorKj5MZJYn1dXVfPGLX6Snp4cxY8ZQXV1Ne3t7odsyy4nDwCxP2tvbj9/Z7OjRow4CKyo+TGSWR319fSc8mxULh4GZmTkMzPKltLT0jPNmI5nDwCxPent7j58zKCkpobe3t8AdmeVu0DCQdL+kNyTtyKpNlrRZ0qvpuSLVJeluSbskvSTp8qx1lqXxr0pallW/QtLLaZ275YuzrYj5nIEVq1z2DB4Arj2pdhuwJSJmAVvSPMB1wKz0WAHcA5nwAG4HPghcCdzeHyBpzIqs9U5+LTMzO8cGDYOI+Amw/6Ty9cCaNL0GWJRVfzAyngUmSZoKfBTYHBH7I6IT2Axcm5ZNjIhnIvMXOg9mbcvMzIbJ2f6dQWVE7AOIiH2S3pPq04E9WeM6Uu1M9Y4B6gOStILMXgSVlZW0tbWdZftm58aoUaM4evTo8WfAn1MrCvn+o7OBjvfHWdQHFBGrgdUAtbW1UVdXdxYtmp07/QHQ/wzgz6kVg7O9mugP6RAP6fmNVO8ALs4aNwPYO0h9xgB1MzMbRmcbBuuB/iuClgE/zKrfmK4qugo4kA4nbQKukVSRThxfA2xKy/4o6ap0FdGNWdsyM7NhMuhhIklrgTrgQkkdZK4KuhN4VFID8DtgcRr+BLAA2AUcAj4FEBH7JX0FeD6N+3JE9J+UXknmiqWxwJPpYWZmw0jF+jW7tbW1sW3btkK3YQZwxnsXFOv/Y/bOI2l7RNQOtMx/gWxmZg4DMzNzGJiZGQ4DMzPDYWBmZjgMzMwMh4GZmeEwMDMzHAZmZobDwMzMcBiYmRkOAzMzw2FgZmY4DMzMDIeBmZnhMDAzMxwGZmaGw8DMzHAYmJkZDgMzM8NhYGZmOAzMzAyHgZmZ4TAwMzMcBmZmhsPAzMxwGJiZGQ4DMzPDYWBmZoygMJB0raRfS9ol6bZC92Nmdj4ZEWEgqRT4B+A6YDawVNLswnZlZnb+KCt0A8mVwK6I+C2ApHXA9cDOgnZl5z1Jw7aNiHjbr2V2tkZKGEwH9mTNdwAfPHmQpBXACoDKykra2tqGpTl752jc3Tik8TUP1JyjTk516ZpLhzT+mzO/eY46sfPRSAmDgX51OuXXpIhYDawGqK2tjbq6unPclr3TvMzL52S7Z/rt37/xWzEYEecMyOwJXJw1PwPYW6BezIbsdD/wHQRWLEZKGDwPzJL0XkmjgSXA+gL3ZDYkEUFE0NraenzarFiMiMNEEXFM0meATUApcH9EvFLgtszMzhsjIgwAIuIJ4IlC92Fmdj4aKYeJzMysgBwGZmbmMDAzM4eBmZkBKtbL3yT9X2B3ofswG8CFwD8XugmzAcyMiCkDLSjaMDAbqSRti4jaQvdhNhQ+TGRmZg4DMzNzGJidC6sL3YDZUPmcgZmZec/AzMwcBmZmhsPAzMxwGFiRk/QBSQtyGHfwbb7O65IufDvbGA6SFkmaXeg+rPg4DKzYfQAYNAxGMkn5/Cr5RYDDwIbMYWAFJ6lK0q8k3Sdph6SHJX1E0s8kvSrpyvR4WtIv0/O/TnfF+zJwg6QXJN0gaYKk70p6WdJLkv5j1us0S3pR0rOSKlNtiqTHJD2fHh9K9QskPZVe7zsMfJ/uk/tfk17zB5LGpWVXSPonSdslbZI0NdXbJN0h6Z+Az0qqlPR46u9FSXPSuP8k6efp/X1HUmmqHzz5/aR1FgJ/m8a/T9Ly9L5eTO+zv6/3pfWel/Tl7D0nSX+V6i9J+lI+/61tBOu/PZ8ffhTqAVQBx4BLyfyCsh24n8wP4OuB/wlMBMrS+I8Aj6XpTwLfytrW14C/z5qvSM8B/Ps0fRfw12n6e8DcNP0nQHuavhv4r2n6z9L6F56h/wA+lObvB/4zMAp4GpiS6jeQuYsfQBvw7axtPAJ8Lk2XAu8GqoENwKhU/zZw4yDv5wHgY1nbvSBr+qtAY5reCCxN038BHEzT15D5Owmlf4uNwIcL/Rnx49w/Rsydzuy891pEvAwg6RVgS0SEpJfJ/LB9N7BG0iwyPwhHnWY7HyFzD20AIqIzTR4h84MNMmHzp1njZ0vHf/GfKOldwIeB/5C28b8k9W/ndPZExM/S9EPAXwI/AmqAzWn7pcC+rHUeyZqeB9yYXq8XOCDpE8AVwPNp/bHAG4O8n5PVSPoqMAmYQObWsgBXkzmkBJlA/G9p+pr0+GWanwDMAn5yxndvRc9hYCNFT9Z0X9Z8H5nP6VeA1oj4c0lVZH6zHojIhMXJjkZEf72X///ZLwGujoh/OWEjmR++Q/mLzJPHRurllYi4+jTrdA+yTQFrIuILAyw73fs52QPAooh4UdIngbocXvNvIuI7g4yzdxifM7Bi8W7g92n6k1n1PwLvypp/CvhM/4ykikG2e/L4D6TJnwAfT7XrgMG28yeS+n/oLwW2Ar8GpvTXJY2S9P7TrL8FWJnGlUqamGofk/SeVJ8saeYgfZz83+NdwD5Jo/rfT/Is0H8+ZUlWfRNwk6QJ6TWn97++vbM5DKxY3AX8jaSfkTnc0q+VzGGeFyTdQOa4eEU6Ef0iUD/Idv8SqE0nS3eSOX4O8CXgw5J+Qeawye8G2U47sEzSS8Bk4J6IOAJ8DPha6uUFYM5p1v8sUJ8Oi20H3h8RO4G/Bp5K290MTB2kj3XAX6UT3+8D/gvwXFr3V1njPgfcIunnaZsHACLiKTKHjZ5JvfyAE8PF3qH83URmb1M6bLUxImoK3ErO0lVF/5LOyywhczL5+kL3ZYXjcwZm56crgG8pc3KkC7ipwP1YgXnPwCxHki4gcxz/ZPMj4s3h7scsnxwGZmbmE8hmZuYwMDMzHAZmZobDwMzMgP8H1+iXEeeFRCUAAAAASUVORK5CYII=\n","text/plain":"<Figure size 432x288 with 1 Axes>"},"metadata":{"needs_background":"light"},"output_type":"display_data"}]},{"metadata":{"trusted":false},"cell_type":"code","source":"df[df['matched_percentage']>30000].head(5)","execution_count":11,"outputs":[{"data":{"text/html":"<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>claim_status</th>\n <th>views</th>\n <th>matching_duration</th>\n <th>claim_created_date</th>\n <th>video_duration_sec</th>\n <th>matched_percentage</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>139145</th>\n <td>ACTIVE</td>\n <td>119</td>\n <td>170</td>\n <td>2020/12/16</td>\n <td>0</td>\n <td>inf</td>\n </tr>\n <tr>\n <th>214743</th>\n <td>ACTIVE</td>\n <td>44</td>\n <td>323</td>\n <td>2015/01/26</td>\n <td>0</td>\n <td>inf</td>\n </tr>\n <tr>\n <th>398312</th>\n <td>ACTIVE</td>\n <td>44</td>\n <td>374</td>\n <td>2014/08/25</td>\n <td>0</td>\n <td>inf</td>\n </tr>\n <tr>\n <th>572831</th>\n <td>ACTIVE</td>\n <td>8</td>\n <td>205</td>\n <td>2016/08/02</td>\n <td>0</td>\n <td>inf</td>\n </tr>\n <tr>\n <th>706394</th>\n <td>ACTIVE</td>\n <td>0</td>\n <td>555</td>\n <td>2016/11/14</td>\n <td>0</td>\n <td>inf</td>\n </tr>\n </tbody>\n</table>\n</div>","text/plain":" claim_status views matching_duration claim_created_date \\\n139145 ACTIVE 119 170 2020/12/16 \n214743 ACTIVE 44 323 2015/01/26 \n398312 ACTIVE 44 374 2014/08/25 \n572831 ACTIVE 8 205 2016/08/02 \n706394 ACTIVE 0 555 2016/11/14 \n\n video_duration_sec matched_percentage \n139145 0 inf \n214743 0 inf \n398312 0 inf \n572831 0 inf \n706394 0 inf "},"execution_count":11,"metadata":{},"output_type":"execute_result"}]},{"metadata":{},"cell_type":"markdown","source":"This is clearly a problem. We'll need to exclude any records w/o a proper video duration field."},{"metadata":{"trusted":false},"cell_type":"code","source":"df = df[df['video_duration_sec']>0]\ndf.head()","execution_count":13,"outputs":[{"data":{"text/html":"<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>claim_status</th>\n <th>views</th>\n <th>matching_duration</th>\n <th>claim_created_date</th>\n <th>video_duration_sec</th>\n <th>matched_percentage</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>ACTIVE</td>\n <td>35</td>\n <td>0</td>\n <td>2020/09/08</td>\n <td>148</td>\n <td>0.0</td>\n </tr>\n <tr>\n <th>1</th>\n <td>ACTIVE</td>\n <td>0</td>\n <td>0</td>\n <td>2020/06/27</td>\n <td>109</td>\n <td>0.0</td>\n </tr>\n <tr>\n <th>2</th>\n <td>ACTIVE</td>\n <td>743</td>\n <td>66</td>\n <td>2020/08/17</td>\n <td>215</td>\n <td>31.0</td>\n </tr>\n <tr>\n <th>3</th>\n <td>ACTIVE</td>\n <td>221</td>\n <td>94</td>\n <td>2020/08/16</td>\n <td>215</td>\n <td>44.0</td>\n </tr>\n <tr>\n <th>4</th>\n <td>ACTIVE</td>\n <td>3</td>\n <td>0</td>\n <td>2020/03/21</td>\n <td>185</td>\n <td>0.0</td>\n </tr>\n </tbody>\n</table>\n</div>","text/plain":" claim_status views matching_duration claim_created_date \\\n0 ACTIVE 35 0 2020/09/08 \n1 ACTIVE 0 0 2020/06/27 \n2 ACTIVE 743 66 2020/08/17 \n3 ACTIVE 221 94 2020/08/16 \n4 ACTIVE 3 0 2020/03/21 \n\n video_duration_sec matched_percentage \n0 148 0.0 \n1 109 0.0 \n2 215 31.0 \n3 215 44.0 \n4 185 0.0 "},"execution_count":13,"metadata":{},"output_type":"execute_result"}]},{"metadata":{"trusted":false},"cell_type":"code","source":"df.boxplot(column=['matched_percentage'])","execution_count":14,"outputs":[{"data":{"text/plain":"<matplotlib.axes._subplots.AxesSubplot at 0x1fdadc78358>"},"execution_count":14,"metadata":{},"output_type":"execute_result"},{"data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAAAYMAAAD5CAYAAADFqlkBAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAaY0lEQVR4nO3df3TV9Z3n8ecrCRB+lBKURn44pKeye0Kj29Ucq5TTk0DHKrOLzG45wulWWnPgDLaZ9tgdtM3sevojjnW2O1PbqZZjrLha0NZxC6wWWZpMS/1RofUHkrayVUoKp+5IgiUMAZL3/nE/YS8QyA1ecnPl9Tjnnvv9vr+f7/e+r1zzyvdH7lcRgZmZnd9KCt2AmZkVnsPAzMwcBmZm5jAwMzMcBmZmBpQVuoGzdeGFF0ZVVVWh2zA7RXd3N+PHjy90G2an2L59+z9HxJSBlhVtGFRVVbFt27ZCt2F2ira2Nurq6grdhtkpJO0+3TIfJjIzM4eBmZk5DMzMDIeBmZnhMDAzMxwGZnmzdu1aampqmD9/PjU1Naxdu7bQLZnlrGgvLTUbSdauXUtTUxMtLS309vZSWlpKQ0MDAEuXLi1wd2aD856BWR40NzfT0tJCfX09ZWVl1NfX09LSQnNzc6FbM8uJw8AsD9rb25k7d+4Jtblz59Le3l6gjsyGxmFglgfV1dVs3br1hNrWrVuprq4uUEdmQ+MwMMuDpqYmGhoaaG1t5dixY7S2ttLQ0EBTU1OhWzPLiU8gm+VB/0nixsZG2tvbqa6uprm52SePrWioWO+BXFtbG/6iOhuJ/EV1NlJJ2h4RtQMt82EiMzPLLQwkvS7pZUkvSNqWapMlbZb0anquSHVJulvSLkkvSbo8azvL0vhXJS3Lql+Rtr8rrat8v1EzMzu9oewZ1EfEB7J2MW4DtkTELGBLmge4DpiVHiuAeyATHsDtwAeBK4Hb+wMkjVmRtd61Z/2OzMxsyN7OYaLrgTVpeg2wKKv+YGQ8C0ySNBX4KLA5IvZHRCewGbg2LZsYEc9E5gTGg1nbMjOzYZDr1UQBPCUpgO9ExGqgMiL2AUTEPknvSWOnA3uy1u1ItTPVOwaon0LSCjJ7EFRWVtLW1pZj+2bD5+DBg/5sWtHJNQw+FBF70w/8zZJ+dYaxAx3vj7Oon1rMhNBqyFxN5Cs2bCTy1URWjHI6TBQRe9PzG8DjZI75/yEd4iE9v5GGdwAXZ60+A9g7SH3GAHUzMxsmg4aBpPGS3tU/DVwD7ADWA/1XBC0Dfpim1wM3pquKrgIOpMNJm4BrJFWkE8fXAJvSsj9KuipdRXRj1rbMzGwY5HKYqBJ4PF3tWQZ8LyJ+JOl54FFJDcDvgMVp/BPAAmAXcAj4FEBE7Jf0FeD5NO7LEbE/Ta8EHgDGAk+mh5mZDZNBwyAifgv8mwHqbwLzB6gH8OnTbOt+4P4B6tuAmhz6NTOzc8B/gWxmZg4DMzNzGJiZGQ4DMzPDYWBmZjgMzMwMh4GZmeEwMDMzHAZmZobDwMzMcBiYmRkOAzMzw2FgZmY4DMzMDIeBmZnhMDAzMxwGZmaGw8DMzHAYmJkZDgMzM8NhYGZmOAzMzAyHgZmZ4TAwMzMcBmZmhsPAzMxwGJiZGQ4DMzPDYWBmZgwhDCSVSvqlpI1p/r2SnpP0qqRHJI1O9TFpfldaXpW1jS+k+q8lfTSrfm2q7ZJ0W/7enpmZ5WIoewafBdqz5r8G/F1EzAI6gYZUbwA6I+IS4O/SOCTNBpYA7weuBb6dAqYU+AfgOmA2sDSNNTOzYZJTGEiaAfwZcF+aFzAP+EEasgZYlKavT/Ok5fPT+OuBdRHRExGvAbuAK9NjV0T8NiKOAOvSWDMzGyZlOY77e2AV8K40fwHQFRHH0nwHMD1NTwf2AETEMUkH0vjpwLNZ28xeZ89J9Q8O1ISkFcAKgMrKStra2nJs32z4HDx40J9NKzqDhoGkfwe8ERHbJdX1lwcYGoMsO119oL2TGKBGRKwGVgPU1tZGXV3dQMPMCqqtrQ1/Nq3Y5LJn8CFgoaQFQDkwkcyewiRJZWnvYAawN43vAC4GOiSVAe8G9mfV+2Wvc7q6mZkNg0HPGUTEFyJiRkRUkTkB/OOI+DjQCnwsDVsG/DBNr0/zpOU/johI9SXpaqP3ArOAnwPPA7PS1Umj02usz8u7MzOznOR6zmAgtwLrJH0V+CXQkuotwP+QtIvMHsESgIh4RdKjwE7gGPDpiOgFkPQZYBNQCtwfEa+8jb7MzGyIhhQGEdEGtKXp35K5EujkMYeBxadZvxloHqD+BPDEUHoxM7P88V8gm5mZw8DMzBwGZnmzdu1aampqmD9/PjU1Naxdu7bQLZnl7O2cQDazZO3atTQ1NdHS0kJvby+lpaU0NGS+oWXp0qUF7s5scN4zMMuD5uZmWlpaqK+vp6ysjPr6elpaWmhuPuV6CbMRyWFglgft7e3MnTv3hNrcuXNpb28/zRpmI4vDwCwPqqur2bp16wm1rVu3Ul1dXaCOzIbG5wzM8qCpqYkbbriB8ePHs3v3bmbOnEl3dzff+MY3Ct2aWU68Z2CWZ5lvbDcrLg4Dszxobm7mkUce4bXXXmPLli289tprPPLIIz6BbEXDYWCWBz6BbMXOYWCWBz6BbMXOYWCWB01NTTQ0NNDa2sqxY8dobW2loaGBpqamQrdmlhNfTWSWB/1/ZdzY2Eh7ezvV1dU0Nzf7r4+taChz35niU1tbG9u2bSt0G2an8G0vbaSStD0iagda5sNEZmbmMDAzM4eBmZnhMDAzMxwGZmaGw8DMzHAYmJkZDgMzM8NhYGZmOAzMzAyHgZmZ4TAwMzMcBmZ509jYSHl5OfX19ZSXl9PY2FjolsxyNmgYSCqX9HNJL0p6RdKXUv29kp6T9KqkRySNTvUxaX5XWl6Vta0vpPqvJX00q35tqu2SdFv+36bZudXY2Mi9997LHXfcwZNPPskdd9zBvffe60CwojHoV1grc3fv8RFxUNIoYCvwWeAW4B8jYp2ke4EXI+IeSTcDl0XEX0haAvx5RNwgaTawFrgSmAb8b+BfpZf5DfCnQAfwPLA0InaeqS9/hbWNJOXl5dTW1rJt2zZ6enoYM2bM8fnDhw8Xuj0z4G1+hXVkHEyzo9IjgHnAD1J9DbAoTV+f5knL56dAuR5YFxE9EfEasItMMFwJ7IqI30bEEWBdGmtWNHp6enjuuedO2DN47rnn6OnpKXRrZjnJ6ZyBpFJJLwBvAJuB/wN0RcSxNKQDmJ6mpwN7ANLyA8AF2fWT1jld3ayoLFiwgFtuuYXy8nJuueUWFixYUOiWzHKW020vI6IX+ICkScDjwEB3+e4/3qTTLDtdfaBAGvDYlaQVwAqAyspK2trazty42TDauHEjN998M/PmzePmm29m48aNAP6cWlEY0j2QI6JLUhtwFTBJUln67X8GsDcN6wAuBjoklQHvBvZn1ftlr3O6+smvvxpYDZlzBr61oI0U/ecI7r//fu655x7GjBnD1VdfzbZt23wLTCsKuVxNNCXtESBpLPARoB1oBT6Whi0Dfpim16d50vIfR+Ys9XpgSbra6L3ALODnZE4Yz0pXJ40GlqSxZkVj+fLlPPPMM0yaNAmASZMm8cwzz7B8+fICd2aWm1zOGUwFWiW9ROYH9+aI2AjcCtwiaReZcwItaXwLcEGq3wLcBhARrwCPAjuBHwGfjojetGfxGWATmZB5NI01Kxpz5sxh/Pjx7N+/H4D9+/czfvx45syZU+DOzHIz6KWlI5UvLbWRpKamhm9+85vU19fT1tZGXV0dra2tNDY2smPHjkK3Zwa8zUtLzWxw7e3tdHR0UFNTw/z586mpqaGjo4P29vZCt2aWkyGdQDazgU2bNo1bb72Vhx9+mN7eXkpLS/n4xz/OtGnTCt2aWU68Z2CWJycfci3WQ7B2fvKegVke7N27l9mzZzNv3rzjtZqaGnbuPOO3qpiNGN4zMMuDsWPHsmPHDlauXMmGDRtYuXIlO3bsYOzYsYVuzSwnDgOzPOju7mbChAksXryY8vJyFi9ezIQJE+ju7i50a2Y58WEiszxZsmQJ11133fFvLf3EJz7BfffdV+i2zHLiPQOzPHnooYeYOnUqJSUlTJ06lYceeqjQLZnlzGFglgdjxozh8OHDXHbZZTz22GNcdtllHD58mDFjxhS6NbOc+DCRWR709PRw+eWXs2HDBtavX48kLr/8cn7xi18UujWznHjPwCxP7rzzTvr6+mhtbaWvr48777yz0C2Z5cx7BmZ5MGPGDBYvXkxFRQW7d+9m5syZdHZ2MmPGjEK3ZpYT7xmY5cGiRYs4cOAAr7/+OhHB66+/zoEDB1i0aNHgK5uNAA4Dszz47ne/C0BFRQWSqKioOKFuNtI5DMzyoLu7m+nTp9PV1UVE0NXVxfTp0/1HZ1Y0fM7ALE9+//vfH5+OiBPmzUY67xmY5dHChQt5/PHHWbhwYaFbMRsS7xmY5dFPf/pTNmzYcPxeyGbFwnsGZnkybdq0E84Z+MY2VkwcBmZ5snfvXiZNmkRJSQmTJk1i7969hW7JLGcOA7M8uPTSSwHo7Oykr6+Pzs7OE+pmI53DwCwPfvOb3wypbjbSOAzM8qCnpweAiy66iJKSEi666KIT6mYjna8mMsuTCRMm8L3vfY/e3l5KS0tZuHAhBw8eLHRbZjnxnoFZnhw5cuSM82YjmfcMzPLkyJEjzJs3r9BtmJ0V7xmY5cHp7mjmO51ZsXAYmOXB6U4U+wSyFQuHgVmelJeXn3HebCQbNAwkXSypVVK7pFckfTbVJ0vaLOnV9FyR6pJ0t6Rdkl6SdHnWtpal8a9KWpZVv0LSy2mduyXpXLxZs3Pp8OHDrFy5kg0bNrBy5UoOHz5c6JbMcpbLnsEx4PMRUQ1cBXxa0mzgNmBLRMwCtqR5gOuAWemxArgHMuEB3A58ELgSuL0/QNKYFVnrXfv235rZ8LvkkksoKyvjkksuKXQrZkMy6NVEEbEP2Jem/yipHZgOXA/UpWFrgDbg1lR/MCICeFbSJElT09jNEbEfQNJm4FpJbcDEiHgm1R8EFgFP5uctmg2fz3/+84VuweysDOnSUklVwL8FngMqU1AQEfskvScNmw7syVqtI9XOVO8YoD7Q668gswdBZWUlbW1tQ2nfrCD8ObVikHMYSJoAPAZ8LiLeOsNh/YEWxFnUTy1GrAZWA9TW1kZdXd0gXZsNr4qKCu666y5WrVp1/Mvq/Dm1YpDT1USSRpEJgocj4h9T+Q/p8A/p+Y1U7wAuzlp9BrB3kPqMAepmRWXcuHF0dXWxfPlyurq6GDduXKFbMstZLlcTCWgB2iPiv2ctWg/0XxG0DPhhVv3GdFXRVcCBdDhpE3CNpIp04vgaYFNa9kdJV6XXujFrW2ZF46abbmL27NmUlJQwe/ZsbrrppkK3ZJazXA4TfQj4BPCypBdS7YvAncCjkhqA3wGL07IngAXALuAQ8CmAiNgv6SvA82ncl/tPJgMrgQeAsWROHPvksRWdb33rW3z9619n9uzZ7Ny50yeTragoc9FP8amtrY1t27YVug0zAMrKyujt7WXUqFEcPXr0+HNpaSnHjh0rdHtmAEjaHhG1Ay3zXyCb5UFEMG7cOI4ePQrA0aNHGTduHMX6y5adfxwGZnkwbdo0Ro0aRVVVFSUlJVRVVTFq1CimTZtW6NbMcuKvsDbLg0OHDvHWW29x8OBB+vr62LNnD319fZSWlha6NbOceM/ALA/2789cCzFlyhRKSkqYMmXKCXWzkc5hYJYny5cvZ9++fWzZsoV9+/axfPnyQrdkljMfJjLLk3Xr1vHUU0+xe/duZs6c6b0CKyoOA7M8KCkp4a233qK7u5uIYM+ePfT29lJS4p1vKw7+pJrlwdixYwGYOHEikpg4ceIJdbORzmFglgfd3d1UVVXR2dlJRNDZ2UlVVRXd3d2Fbs0sJz5MZJYne/bsOeHrKFatWlXolsxy5jAwy5O+vr4Tvo/Id2+1YuLDRGZ5EhGUl5cDUF5e7q+isKLiMDDLk3HjxtHT0wNAT0+P72dgRcVhYJYnhw4dYvz48QCMHz+eQ4cOFbgjs9w5DMzy6ODBgyc8mxULh4FZHs2ZM4fvf//7zJkzp9CtmA2JryYyy5OKigqefvppnn766ePznZ2dBe7KLDfeMzDLk66uLiorKwGorKykq6urwB2Z5c5hYJYHkydPJiJ48803AXjzzTeJCCZPnlzgzsxy4zAwy4Nx48YxevTo4/c7PnbsGKNHj/blpVY0fM7ALA86OjpOqR05cmTAutlI5D0DszyRdMI5A38dhRUTh4FZnkQEq1at4sknn2TVqlX+OgorKj5MZJYn1dXVfPGLX6Snp4cxY8ZQXV1Ne3t7odsyy4nDwCxP2tvbj9/Z7OjRow4CKyo+TGSWR319fSc8mxULh4GZmTkMzPKltLT0jPNmI5nDwCxPent7j58zKCkpobe3t8AdmeVu0DCQdL+kNyTtyKpNlrRZ0qvpuSLVJeluSbskvSTp8qx1lqXxr0pallW/QtLLaZ275YuzrYj5nIEVq1z2DB4Arj2pdhuwJSJmAVvSPMB1wKz0WAHcA5nwAG4HPghcCdzeHyBpzIqs9U5+LTMzO8cGDYOI+Amw/6Ty9cCaNL0GWJRVfzAyngUmSZoKfBTYHBH7I6IT2Axcm5ZNjIhnIvMXOg9mbcvMzIbJ2f6dQWVE7AOIiH2S3pPq04E9WeM6Uu1M9Y4B6gOStILMXgSVlZW0tbWdZftm58aoUaM4evTo8WfAn1MrCvn+o7OBjvfHWdQHFBGrgdUAtbW1UVdXdxYtmp07/QHQ/wzgz6kVg7O9mugP6RAP6fmNVO8ALs4aNwPYO0h9xgB1MzMbRmcbBuuB/iuClgE/zKrfmK4qugo4kA4nbQKukVSRThxfA2xKy/4o6ap0FdGNWdsyM7NhMuhhIklrgTrgQkkdZK4KuhN4VFID8DtgcRr+BLAA2AUcAj4FEBH7JX0FeD6N+3JE9J+UXknmiqWxwJPpYWZmw0jF+jW7tbW1sW3btkK3YQZwxnsXFOv/Y/bOI2l7RNQOtMx/gWxmZg4DMzNzGJiZGQ4DMzPDYWBmZjgMzMwMh4GZmeEwMDMzHAZmZobDwMzMcBiYmRkOAzMzw2FgZmY4DMzMDIeBmZnhMDAzMxwGZmaGw8DMzHAYmJkZDgMzM8NhYGZmOAzMzAyHgZmZ4TAwMzMcBmZmhsPAzMxwGJiZGQ4DMzPDYWBmZoygMJB0raRfS9ol6bZC92Nmdj4ZEWEgqRT4B+A6YDawVNLswnZlZnb+KCt0A8mVwK6I+C2ApHXA9cDOgnZl5z1Jw7aNiHjbr2V2tkZKGEwH9mTNdwAfPHmQpBXACoDKykra2tqGpTl752jc3Tik8TUP1JyjTk516ZpLhzT+mzO/eY46sfPRSAmDgX51OuXXpIhYDawGqK2tjbq6unPclr3TvMzL52S7Z/rt37/xWzEYEecMyOwJXJw1PwPYW6BezIbsdD/wHQRWLEZKGDwPzJL0XkmjgSXA+gL3ZDYkEUFE0NraenzarFiMiMNEEXFM0meATUApcH9EvFLgtszMzhsjIgwAIuIJ4IlC92Fmdj4aKYeJzMysgBwGZmbmMDAzM4eBmZkBKtbL3yT9X2B3ofswG8CFwD8XugmzAcyMiCkDLSjaMDAbqSRti4jaQvdhNhQ+TGRmZg4DMzNzGJidC6sL3YDZUPmcgZmZec/AzMwcBmZmhsPAzMxwGFiRk/QBSQtyGHfwbb7O65IufDvbGA6SFkmaXeg+rPg4DKzYfQAYNAxGMkn5/Cr5RYDDwIbMYWAFJ6lK0q8k3Sdph6SHJX1E0s8kvSrpyvR4WtIv0/O/TnfF+zJwg6QXJN0gaYKk70p6WdJLkv5j1us0S3pR0rOSKlNtiqTHJD2fHh9K9QskPZVe7zsMfJ/uk/tfk17zB5LGpWVXSPonSdslbZI0NdXbJN0h6Z+Az0qqlPR46u9FSXPSuP8k6efp/X1HUmmqHzz5/aR1FgJ/m8a/T9Ly9L5eTO+zv6/3pfWel/Tl7D0nSX+V6i9J+lI+/61tBOu/PZ8ffhTqAVQBx4BLyfyCsh24n8wP4OuB/wlMBMrS+I8Aj6XpTwLfytrW14C/z5qvSM8B/Ps0fRfw12n6e8DcNP0nQHuavhv4r2n6z9L6F56h/wA+lObvB/4zMAp4GpiS6jeQuYsfQBvw7axtPAJ8Lk2XAu8GqoENwKhU/zZw4yDv5wHgY1nbvSBr+qtAY5reCCxN038BHEzT15D5Owmlf4uNwIcL/Rnx49w/Rsydzuy891pEvAwg6RVgS0SEpJfJ/LB9N7BG0iwyPwhHnWY7HyFzD20AIqIzTR4h84MNMmHzp1njZ0vHf/GfKOldwIeB/5C28b8k9W/ndPZExM/S9EPAXwI/AmqAzWn7pcC+rHUeyZqeB9yYXq8XOCDpE8AVwPNp/bHAG4O8n5PVSPoqMAmYQObWsgBXkzmkBJlA/G9p+pr0+GWanwDMAn5yxndvRc9hYCNFT9Z0X9Z8H5nP6VeA1oj4c0lVZH6zHojIhMXJjkZEf72X///ZLwGujoh/OWEjmR++Q/mLzJPHRurllYi4+jTrdA+yTQFrIuILAyw73fs52QPAooh4UdIngbocXvNvIuI7g4yzdxifM7Bi8W7g92n6k1n1PwLvypp/CvhM/4ykikG2e/L4D6TJnwAfT7XrgMG28yeS+n/oLwW2Ar8GpvTXJY2S9P7TrL8FWJnGlUqamGofk/SeVJ8saeYgfZz83+NdwD5Jo/rfT/Is0H8+ZUlWfRNwk6QJ6TWn97++vbM5DKxY3AX8jaSfkTnc0q+VzGGeFyTdQOa4eEU6Ef0iUD/Idv8SqE0nS3eSOX4O8CXgw5J+Qeawye8G2U47sEzSS8Bk4J6IOAJ8DPha6uUFYM5p1v8sUJ8Oi20H3h8RO4G/Bp5K290MTB2kj3XAX6UT3+8D/gvwXFr3V1njPgfcIunnaZsHACLiKTKHjZ5JvfyAE8PF3qH83URmb1M6bLUxImoK3ErO0lVF/5LOyywhczL5+kL3ZYXjcwZm56crgG8pc3KkC7ipwP1YgXnPwCxHki4gcxz/ZPMj4s3h7scsnxwGZmbmE8hmZuYwMDMzHAZmZobDwMzMgP8H1+iXEeeFRCUAAAAASUVORK5CYII=\n","text/plain":"<Figure size 432x288 with 1 Axes>"},"metadata":{"needs_background":"light"},"output_type":"display_data"}]},{"metadata":{"trusted":false},"cell_type":"code","source":"df[df['matched_percentage']>30000].head(5)","execution_count":15,"outputs":[{"data":{"text/html":"<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>claim_status</th>\n <th>views</th>\n <th>matching_duration</th>\n <th>claim_created_date</th>\n <th>video_duration_sec</th>\n <th>matched_percentage</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>8331853</th>\n <td>ACTIVE</td>\n <td>44</td>\n <td>304</td>\n <td>2017/05/18</td>\n <td>1</td>\n <td>30400.0</td>\n </tr>\n <tr>\n <th>8373434</th>\n <td>ACTIVE</td>\n <td>50</td>\n <td>303</td>\n <td>2018/02/01</td>\n <td>1</td>\n <td>30300.0</td>\n </tr>\n <tr>\n <th>9791067</th>\n <td>ACTIVE</td>\n <td>58</td>\n <td>357</td>\n <td>2018/09/12</td>\n <td>1</td>\n <td>35700.0</td>\n </tr>\n <tr>\n <th>27486070</th>\n <td>ACTIVE</td>\n <td>230</td>\n <td>371</td>\n <td>2018/08/13</td>\n <td>1</td>\n <td>37100.0</td>\n </tr>\n <tr>\n <th>31320903</th>\n <td>ACTIVE</td>\n <td>110</td>\n <td>523</td>\n <td>2016/03/31</td>\n <td>1</td>\n <td>52300.0</td>\n </tr>\n </tbody>\n</table>\n</div>","text/plain":" claim_status views matching_duration claim_created_date \\\n8331853 ACTIVE 44 304 2017/05/18 \n8373434 ACTIVE 50 303 2018/02/01 \n9791067 ACTIVE 58 357 2018/09/12 \n27486070 ACTIVE 230 371 2018/08/13 \n31320903 ACTIVE 110 523 2016/03/31 \n\n video_duration_sec matched_percentage \n8331853 1 30400.0 \n8373434 1 30300.0 \n9791067 1 35700.0 \n27486070 1 37100.0 \n31320903 1 52300.0 "},"execution_count":15,"metadata":{},"output_type":"execute_result"}]},{"metadata":{"trusted":false},"cell_type":"code","source":"print(len(df[df['matched_percentage']>100]))\nprint(len(df[df['matched_percentage']<=100]))","execution_count":17,"outputs":[{"name":"stdout","output_type":"stream","text":"3606213\n27758762\n"}]},{"metadata":{},"cell_type":"markdown","source":"Most of the data shows the video duration as being less than or equal to the \"matching_duration\" value. Odd to see so many with > 100%..."},{"metadata":{"trusted":false},"cell_type":"code","source":"results = pd.DataFrame(columns=['Cohort','Views'])\ncumulative_total = 0\nthis_total = 0\nfor i in range(101):\n this_total = df[(df['matched_percentage'] >= i-1)&(df['matched_percentage'] < i)]['views'].sum()\n cumulative_total += this_total\n #print(\"In the {ix} - {i}% match group, there are {v} claimed views. Cumulative total: {cv}\".format(ix = i-1, i=i, v=this_total, cv=cumulative_total))\n results = results.append({'Cohort':\"{ix} - {i}\".format(ix = i-1, i=i),'Views':this_total},ignore_index=True)","execution_count":20,"outputs":[]},{"metadata":{"trusted":false},"cell_type":"code","source":"results.to_csv('results.csv',index=False)","execution_count":22,"outputs":[]},{"metadata":{"trusted":false},"cell_type":"code","source":"# Only 2020 onward\n\nresults = pd.DataFrame(columns=['Cohort','Views'])\ncumulative_total = 0\nthis_total = 0\nfor i in range(101):\n this_total = df[(df['claim_created_date']>= '2020')&(df['matched_percentage'] >= i-1)&(df['matched_percentage'] < i)]['views'].sum()\n cumulative_total += this_total\n #print(\"In the {ix} - {i}% match group, there are {v} claimed views. Cumulative total: {cv}\".format(ix = i-1, i=i, v=this_total, cv=cumulative_total))\n results = results.append({'Cohort':\"{ix} - {i}\".format(ix = i-1, i=i),'Views':this_total},ignore_index=True)","execution_count":23,"outputs":[]},{"metadata":{"trusted":false},"cell_type":"code","source":"results.to_csv('results2.csv',index=False)","execution_count":25,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"Remaining analysis done in Excel."},{"metadata":{"trusted":false},"cell_type":"code","source":"len(df[(df['claim_created_date']>= '2020')&(df['matched_percentage'] > 1)&(df['matched_percentage'] <= 2)]['views'])","execution_count":29,"outputs":[{"data":{"text/plain":"775537"},"execution_count":29,"metadata":{},"output_type":"execute_result"}]}],"metadata":{"kernelspec":{"name":"python3","display_name":"Python 3","language":"python"},"language_info":{"name":"python","version":"3.6.5","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"toc":{"nav_menu":{},"number_sections":false,"sideBar":false,"skip_h1_title":false,"base_numbering":1,"title_cell":"Table of Contents","title_sidebar":"Contents","toc_cell":false,"toc_position":{},"toc_section_display":false,"toc_window_display":false}},"nbformat":4,"nbformat_minor":4} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment