We have identified a number of improvements to the Ad Campaign case study on 5/8. Please document in...

Question

We have identified a number of improvements to the Ad Campaign case study on 5/8. Please document in the first cell of the notebook how you suggest to make these improvements and exactly what cells you are modifying to introduce the following changes. Also, document your changes in the source code as well.

1. Campaign duration should not be negative.

2. Day of week visuals should actually display Day of week on x-axis, ticks should be clearly labeled Monday through Sunday and the data should match the tick labels.

3. We should be using the US date format exclusively (not European).

Best Regards,

Shivinder · Accepted Answer

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "1Bs1innK5QPb"
   },
   "source": [
    "# Chapter 9 - Data Science"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#To change date format
",
    "#Amercian date format is mm/dd/yyyy. I have changed it.
",
    "#I have made the negative campaign duration to postitive.
",
    "#And I have added the day of the weeks."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "5pNpI9xW5QPc"
   },
   "source": [
    "## 0 - Setting up the notebook"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 406
    },
    "colab_type": "code",
    "id": "Mp3qnAoS5QPe",
    "outputId": "5ac5685b-b5ff-4635-dcbb-7986b1e3d1cb"
   },
   "outputs": [],
   "source": [
    "#!pip install faker
",
    "#!pip install delorean
",
    "
",
    "import json
",
    "import calendar
",
    "import random
",
    "from datetime import date, timedelta, datetime
",
    "import faker
",
    "import numpy as np
",
    "from pandas import DataFrame
",
    "from delorean import parse
",
    "import pandas as pd
",
    "
",
    "# make the graphs nicer
",
    "import matplotlib.pyplot as plt
",
    "plt.style.use('ggplot')
",
    "# see all available with: print(plt.style.available)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "wegaBXNB5QPg"
   },
   "source": [
    "## 1 - Preparing the Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "s_DxNSf55QPh"
   },
   "outputs": [],
   "source": [
    "# create the faker to populate the data
",
    "fake = faker.Faker()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "irfx4AJr5QPj"
   },
   "outputs": [],
   "source": [
    "usernames = set()
",
    "usernames_no = 1000
",
    "
",
    "# populate the set with 1000 unique usernames
",
    "while len(usernames)  skew
",
    "    if male:
",
    "        return fake.name_male(), 'M'
",
    "    else:
",
    "        return fake.name_female(), 'F'
",
    "
",
    "# for each username, create a complete user profile
",
    "# simulate user data coming from an API. It is a list
",
    "# of JSON strings (users).
",
    "def get_users(usernames):
",
    "    users = []
",
    "    for username in usernames:
",
    "        name, gender = get_random_name_and_gender()
",
    "        user = {
",
    "            'username': username,
",
    "            'name': name,
",
    "            'gender': gender,
",
    "            'email': fake.email(),
",
    "            'age': fake.random_int(min=18, max=90),
",
    "            'address': fake.address(),
",
    "        }
",
    "        users.append(json.dumps(user))
",
    "    return users
",
    "
",
    "users = get_users(usernames)
",
    "users[:3]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "3cEb9cLG5QPo"
   },
   "outputs": [],
   "source": [
    "# campaign name format:
",
    "# InternalType_StartDate_EndDate_TargetAge_TargetGender_Currency
",
    "def get_type():
",
    "    # just some gibberish internal codes
",
    "    types = ['AKX', 'BYU', 'GRZ', 'KTR']
",
    "    return random.choice(types)
",
    "
",
    "def get_start_end_dates():
",
    "    duration = random.randint(1, 2 * 365)
",
    "    offset = random.randint(-365, 365)
",
    "    start = date.today() - timedelta(days=offset)
",
    "    end = start + timedelta(days=duration)
",
    "    
",
    "    def _format_date(date_):
",
    "        return date_.strftime("%Y%m%d") 
",
    "    
",
    "    return _format_date(start), _format_date(end)
",
    "
",
    "def get_age():
",
    "    age = random.randint(20, 45)
",
    "    age -= age % 5
",
    "    diff = random.randint(5, 25)
",
    "    diff -= diff % 5
",
    "    return '{}-{}'.format(age, age + diff)
",
    "
",
    "def get_gender():
",
    "    return random.choice(('M', 'F', 'B'))
",
    "
",
    "def get_currency():
",
    "    return random.choice(('GBP', 'EUR', 'USD'))
",
    "
",
    "def get_campaign_name():
",
    "    separator = '_'
",
    "    type_ = get_type()
",
    "    start_end = separator.join(get_start_end_dates())
",
    "    age = get_age()
",
    "    gender = get_gender()
",
    "    currency = get_currency()
",
    "    return separator.join(
",
    "        (type_, start_end, age, gender, currency))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "ehc8mWDw5QPq"
   },
   "outputs": [],
   "source": [
    "# campaign data:
",
    "# name, budget, spent, clicks, impressions
",
    "def get_campaign_data():
",
    "    name = get_campaign_name()
",
    "    budget = random.randint(10**3, 10**6)
",
    "    spent = random.randint(10**2, budget)    
",
    "    clicks = int(random.triangular(10**2, 10**5, 0.2 * 10**5))    
",
    "    impressions = int(random.gauss(0.5 * 10**6, 2))
",
    "    return {
",
    "        'cmp_name': name,
",
    "        'cmp_bgt': budget,
",
    "        'cmp_spent': spent,
",
    "        'cmp_clicks': clicks,
",
    "        'cmp_impr': impressions
",
    "    }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "FvtOrJPw5QPs"
   },
   "outputs": [],
   "source": [
    "# assemble the logic to get the final version of the rough data
",
    "# data will be a list of dictionaries. Each dictionary will follow
",
    "# this structure:
",
    "# {'user': user_json, 'campaigns': [c1, c2, ...]}
",
    "# where user_json is the JSON string version of a user data dict
",
    "# and c1, c2, ... are campaign dicts as returned by
",
    "# get_campaign_data
",
    "
",
    "def get_data(users):
",
    "    data = []
",
    "    for user in users:
",
    "        campaigns = [get_campaign_data()
",
    "                     for _ in range(random.randint(2, 8))]
",
    "        data.append({'user': user, 'campaigns': campaigns})
",
    "    return data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "Ux9VN0Q15QPu"
   },
   "source": [
    "## 2 - Cleaning the data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 994
    },
    "colab_type": "code",
    "id": "7kxPpJCh5QPv",
    "outputId": "d0e63299-c882-42ea-9b30-23a6e8b5a63e"
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'user': '{"username": "ashley97", "name": "Miranda Cobb", "gender": "F", "email": "victor34@hotmail.com", "age": 35, "address": "00037 Bowers Mall Apt. 791\\nHuangfurt, CA 81423"}',
",
       "  'campaigns': [{'cmp_name': 'KTR_20191001_20210104_30-45_B_EUR',
",
       "    'cmp_bgt': 542731,
",
       "    'cmp_spent': 320061,
",
       "    'cmp_clicks': 56556,
",
       "    'cmp_impr': 499998},
",
       "   {'cmp_name': 'KTR_20181119_20190516_40-60_B_EUR',
",
       "    'cmp_bgt': 563162,
",
       "    'cmp_spent': 294891,
",
       "    'cmp_clicks': 66268,
",
       "    'cmp_impr': 500001}]},
",
       " {'user': '{"username": "lstewart", "name": "Julie Harris", "gender": "F", "email": "shelby33@blankenship.com", "age": 83, "address": "83858 Jones Streets Suite 212\\nSarahburgh, SD 84019"}',
",
       "  'campaigns': [{'cmp_name': 'KTR_20190603_20200807_35-45_M_EUR',
",
       "    'cmp_bgt': 354212,
",
       "    'cmp_spent': 30657,
",
       "    'cmp_clicks': 29316,
",
       "    'cmp_impr': 500001},
",
       "   {'cmp_name': 'GRZ_20191119_20210913_40-55_B_GBP',
",
       "    'cmp_bgt': 994364,
",
       "    'cmp_spent': 122783,
",
       "    'cmp_clicks': 63457,
",
       "    'cmp_impr': 499999}]}]"
      ]
     },
     "execution_count": 83,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# fetch simulated rough data
",
    "rough_data = get_data(users)
",
    "
",
    "rough_data[:2]  # let's take a peek"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 238
    },
    "colab_type": "code",
    "id": "Bf6rqL-d5QPx",
    "outputId": "9c0710be-c3e8-4f5b-c8c9-1df0508396ed"
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'cmp_name': 'KTR_20191001_20210104_30-45_B_EUR',
",
       "  'cmp_bgt': 542731,
",
       "  'cmp_spent': 320061,
",
       "  'cmp_clicks': 56556,
",
       "  'cmp_impr': 499998,
",
       "  'user': '{"username": "ashley97", "name": "Miranda Cobb", "gender": "F", "email": "victor34@hotmail.com", "age": 35, "address": "00037 Bowers Mall Apt. 791\\nHuangfurt, CA 81423"}'},
",
       " {'cmp_name': 'KTR_20181119_20190516_40-60_B_EUR',
",
       "  'cmp_bgt': 563162,
",
       "  'cmp_spent': 294891,
",
       "  'cmp_clicks': 66268,
",
       "  'cmp_impr': 500001,
",
       "  'user': '{"username": "ashley97", "name": "Miranda Cobb", "gender": "F", "email": "victor34@hotmail.com", "age": 35, "address": "00037 Bowers Mall Apt. 791\\nHuangfurt, CA 81423"}'}]"
      ]
     },
     "execution_count": 84,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Let's start from having a different version of the data
",
    "# I want a list whose items will be dicts. Each dict is 
",
    "# the original campaign dict plus the user JSON
",
    "
",
    "data = []
",
    "for datum in rough_data:
",
    "    for campaign in datum['campaigns']:
",
    "        campaign.update({'user': datum['user']})
",
    "        data.append(campaign)
",
    "data[:2]  # let's take another peek"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "25HN96S65QP0"
   },
   "source": [
    "### Creating the DataFrame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 195
    },
    "colab_type": "code",
    "id": "g-te8Uy85QP1",
    "outputId": "82764019-7e45-4384-e30a-136e3468b786"
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "
",
       "
",
       "    .dataframe tbody tr th:only-of-type {
",
       "        vertical-align: middle;
",
       "    }
",
       "
",
       "    .dataframe tbody tr th {
",
       "        vertical-align: top;
",
       "    }
",
       "
",
       "    .dataframe thead th {
",
       "        text-align: right;
",
       "    }
",
       "
",
       "
",
       "  
",
       "    
",
       "      
",
       "      cmp_bgt
",
       "      cmp_clicks
",
       "      cmp_impr
",
       "      cmp_name
",
       "      cmp_spent
",
       "      user
",
       "    
",
       "  
",
       "  
",
       "    
",
       "      0
",
       "      542731
",
       "      56556
",
       "      499998
",
       "      KTR_20191001_20210104_30-45_B_EUR
",
       "      320061
",
       "      {"username": "ashley97", "name": "Miranda Cobb...
",
       "    
",
       "    
",
       "      1
",
       "      563162
",
       "      66268
",
       "      500001
",
       "      KTR_20181119_20190516_40-60_B_EUR
",
       "      294891
",
       "      {"username": "ashley97", "name": "Miranda Cobb...
",
       "    
",
       "    
",
       "      2
",
       "      354212
",
       "      29316
",
       "      500001
",
       "      KTR_20190603_20200807_35-45_M_EUR
",
       "      30657
",
       "      {"username": "lstewart", "name": "Julie Harris...
",
       "    
",
       "    
",
       "      3
",
       "      994364
",
       "      63457
",
       "      499999
",
       "      GRZ_20191119_20210913_40-55_B_GBP
",
       "      122783
",
       "      {"username": "lstewart", "name": "Julie Harris...
",
       "    
",
       "    
",
       "      4
",
       "      205775
",
       "      14040
",
       "      499997
",
       "      KTR_20200330_20211027_20-40_F_GBP
",
       "      42387
",
       "      {"username": "sharon85", "name": "Christopher ...
",
       "    
",
       "  
",
       "
",
       ""
      ],
      "text/plain": [
       "   cmp_bgt  cmp_clicks  cmp_impr                           cmp_name  \
",
       "0   542731       56556    499998  KTR_20191001_20210104_30-45_B_EUR   
",
       "1   563162       66268    500001  KTR_20181119_20190516_40-60_B_EUR   
",
       "2   354212       29316    500001  KTR_20190603_20200807_35-45_M_EUR   
",
       "3   994364       63457    499999  GRZ_20191119_20210913_40-55_B_GBP   
",
       "4   205775       14040    499997  KTR_20200330_20211027_20-40_F_GBP   
",
       "
",
       "   cmp_spent                                               user  
",
       "0     320061  {"username": "ashley97", "name": "Miranda Cobb...  
",
       "1     294891  {"username": "ashley97", "name": "Miranda Cobb...  
",
       "2      30657  {"username": "lstewart", "name": "Julie Harris...  
",
       "3     122783  {"username": "lstewart", "name": "Julie Harris...  
",
       "4      42387  {"username": "sharon85", "name": "Christopher ...  "
      ]
     },
     "execution_count": 85,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# data is good enough for creating an initial DataFrame
",
    "df = DataFrame(data)
",
    "
",
    "# let's take a peek at the first 5 rows, to make sure
",
    "# nothing weird has happened
",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 134
    },
    "colab_type": "code",
    "id": "NXdilDku5QP6",
    "outputId": "f66cb3d5-c03c-408c-d51a-fc55e10cfece"
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "cmp_bgt       4920
",
       "cmp_clicks    4920
",
       "cmp_impr      4920
",
       "cmp_name      4920
",
       "cmp_spent     4920
",
       "user          4920
",
       "dtype: int64"
      ]
     },
     "execution_count": 86,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# OK! DataFrame is alive and well!
",
    "# let's get a sense of how many rows there are and what is
",
    "# their structure.
",
    "df.count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 284
    },
    "colab_type": "code",
    "id": "1ag3HSMI5QP8",
    "outputId": "fb213f9c-fd7e-491f-8f6f-f3ca4f094926"
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "
",
       "
",
       "    .dataframe tbody tr th:only-of-type {
",
       "        vertical-align: middle;
",
       "    }
",
       "
",
       "    .dataframe tbody tr th {
",
       "        vertical-align: top;
",
       "    }
",
       "
",
       "    .dataframe thead th {
",
       "        text-align: right;
",
       "    }
",
       "
",
       "
",
       "  
",
       "    
",
       "      
",
       "      cmp_bgt
",
       "      cmp_clicks
",
       "      cmp_impr
",
       "      cmp_spent
",
       "    
",
       "  
",
       "  
",
       "    
",
       "      count
",
       "      4920.000000
",
       "      4920.000000
",
       "      4920.000000
",
       "      4920.000000
",
       "    
",
       "    
",
       "      mean
",
       "      504930.672358
",
       "      40222.850203
",
       "      499999.458943
",
       "      250462.501016
",
       "    
",
       "    
",
       "      std
",
       "      286093.538484
",
       "      21672.391822
",
       "      2.026843
",
       "      218451.424805
",
       "    
",
       "    
",
       "      min
",
       "      1322.000000
",
       "      701.000000
",
       "      499992.000000
",
       "      143.000000
",
       "    
",
       "    
",
       "      25%
",
       "      255732.500000
",
       "      22224.500000
",
       "      499998.000000
",
       "      70305.500000
",
       "    
",
       "    
",
       "      50%
",
       "      504126.000000
",
       "      36854.500000
",
       "      499999.000000
",
       "      189206.000000
",
       "    
",
       "    
",
       "      75%
",
       "      753168.250000
",
       "      55770.250000
",
       "      500001.000000
",
       "      386425.750000
",
       "    
",
       "    
",
       "      max
",
       "      999870.000000
",
       "      98858.000000
",
       "      500007.000000
",
       "      981524.000000
",
       "    
",
       "  
",
       "
",
       ""
      ],
      "text/plain": [
       "             cmp_bgt    cmp_clicks       cmp_impr      cmp_spent
",
       "count    4920.000000   4920.000000    4920.000000    4920.000000
",
       "mean   504930.672358  40222.850203  499999.458943  250462.501016
",
       "std    286093.538484  21672.391822       2.026843  218451.424805
",
       "min      1322.000000    701.000000  499992.000000     143.000000
",
       "25%    255732.500000  22224.500000  499998.000000   70305.500000
",
       "50%    504126.000000  36854.500000  499999.000000  189206.000000
",
       "75%    753168.250000  55770.250000  500001.000000  386425.750000
",
       "max    999870.000000  98858.000000  500007.000000  981524.000000"
      ]
     },
     "execution_count": 87,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 136
    },
    "colab_type": "code",
    "id": "7DcOF6xY5QP_",
    "outputId": "adc4952e-453b-431c-8f71-bb9eab346bb5"
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "
",
       "
",
       "    .dataframe tbody tr th:only-of-type {
",
       "        vertical-align: middle;
",
       "    }
",
       "
",
       "    .dataframe tbody tr th {
",
       "        vertical-align: top;
",
       "    }
",
       "
",
       "    .dataframe thead th {
",
       "        text-align: right;
",
       "    }
",
       "
",
       "
",
       "  
",
       "    
",
       "      
",
       "      cmp_bgt
",
       "      cmp_clicks
",
       "      cmp_impr
",
       "      cmp_name
",
       "      cmp_spent
",
       "      user
",
       "    
",
       "  
",
       "  
",
       "    
",
       "      2922
",
       "      999870
",
       "      40078
",
       "      500000
",
       "      KTR_20180522_20181120_25-40_M_GBP
",
       "      76771
",
       "      {"username": "christine59", "name": "Stephanie...
",
       "    
",
       "    
",
       "      1765
",
       "      999775
",
       "      34657
",
       "      499998
",
       "      BYU_20200125_20201213_30-35_F_USD
",
       "      571672
",
       "      {"username": "yhowell", "name": "Jonathan Loga...
",
       "    
",
       "    
",
       "      178
",
       "      999756
",
       "      36870
",
       "      500000
",
       "      AKX_20180630_20190615_20-25_B_EUR
",
       "      262324
",
       "      {"username": "scott93", "name": "Nicholas Vill...
",
       "    
",
       "  
",
       "
",
       ""
      ],
      "text/plain": [
       "      cmp_bgt  cmp_clicks  cmp_impr                           cmp_name  \
",
       "2922   999870       40078    500000  KTR_20180522_20181120_25-40_M_GBP   
",
       "1765   999775       34657    499998  BYU_20200125_20201213_30-35_F_USD   
",
       "178    999756       36870    500000  AKX_20180630_20190615_20-25_B_EUR   
",
       "
",
       "      cmp_spent                                               user  
",
       "2922      76771  {"username": "christine59", "name": "Stephanie...  
",
       "1765     571672  {"username": "yhowell", "name": "Jonathan Loga...  
",
       "178      262324  {"username": "scott93", "name": "Nicholas Vill...  "
      ]
     },
     "execution_count": 88,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# let's see which are the top and bottom 3 campaigns according
",
    "# to budget (regardless of the currency)
",
    "df.sort_values(by=['cmp_bgt'], ascending=False).head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 136
    },
    "colab_type": "code",
    "id": "h9WFkFnz5QQC",
    "outputId": "5e366b8d-50a9-4ce3-a5b4-ff5ff272a0dc"
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "
",
       "
",
       "    .dataframe tbody tr th:only-of-type {
",
       "        vertical-align: middle;
",
       "    }
",
       "
",
       "    .dataframe tbody tr th {
",
       "        vertical-align: top;
",
       "    }
",
       "
",
       "    .dataframe thead th {
",
       "        text-align: right;
",
       "    }
",
       "
",
       "
",
       "  
",
       "    
",
       "      
",
       "      cmp_bgt
",
       "      cmp_clicks
",
       "      cmp_impr
",
       "      cmp_name
",
       "      cmp_spent
",
       "      user
",
       "    
",
       "  
",
       "  
",
       "    
",
       "      4230
",
       "      1755
",
       "      44369
",
       "      500002
",
       "      GRZ_20190518_20210124_30-40_M_USD
",
       "      1430
",
       "      {"username": "mejiachristine", "name": "Megan ...
",
       "    
",
       "    
",
       "      2321
",
       "      1607
",
       "      57234
",
       "      500000
",
       "      AKX_20190824_20200218_35-45_B_EUR
",
       "      1192
",
       "      {"username": "zreynolds", "name": "Michael Mor...
",
       "    
",
       "    
",
       "      1433
",
       "      1322
",
       "      11909
",
       "      500002
",
       "      GRZ_20181012_20200705_30-45_F_GBP
",
       "      243
",
       "      {"username": "brownbrooke", "name": "Michelle ...
",
       "    
",
       "  
",
       "
",
       ""
      ],
      "text/plain": [
       "      cmp_bgt  cmp_clicks  cmp_impr                           cmp_name  \
",
       "4230     1755       44369    500002  GRZ_20190518_20210124_30-40_M_USD   
",
       "2321     1607       57234    500000  AKX_20190824_20200218_35-45_B_EUR   
",
       "1433     1322       11909    500002  GRZ_20181012_20200705_30-45_F_GBP   
",
       "
",
       "      cmp_spent                                               user  
",
       "4230       1430  {"username": "mejiachristine", "name": "Megan ...  
",
       "2321       1192  {"username": "zreynolds", "name": "Michael Mor...  
",
       "1433        243  {"username": "brownbrooke", "name": "Michelle ...  "
      ]
     },
     "execution_count": 89,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.sort_values(by=['cmp_bgt'], ascending=False).tail(3)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "TcdxS56G5QQE"
   },
   "source": [
    "## 3 - Manipulating the DataFrame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 136
    },
    "colab_type": "code",
    "id": "dEr7VsTU5QQH",
    "outputId": "b1cff3fd-e9b7-4667-df89-31049582d56d"
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "
",
       "
",
       "    .dataframe tbody tr th:only-of-type {
",
       "        vertical-align: middle;
",
       "    }
",
       "
",
       "    .dataframe tbody tr th {
",
       "        vertical-align: top;
",
       "    }
",
       "
",
       "    .dataframe thead th {
",
       "        text-align: right;
",
       "    }
",
       "
",
       "
",
       "  
",
       "    
",
       "      
",
       "      Type
",
       "      Start
",
       "      End
",
       "      Age
",
       "      Gender
",
       "      Currency
",
       "    
",
       "  
",
       "  
",
       "    
",
       "      0
",
       "      KTR
",
       "      01-10-2019
",
       "      04-01-2021
",
       "      30-45
",
       "      B
",
       "      EUR
",
       "    
",
       "    
",
       "      1
",
       "      KTR
",
       "      11-19-2018
",
       "      05-16-2019
",
       "      40-60
",
       "      B
",
       "      EUR
",
       "    
",
       "    
",
       "      2
",
       "      KTR
",
       "      03-06-2019
",
       "      07-08-2020
",
       "      35-45
",
       "      M
",
       "      EUR
",
       "    
",
       "  
",
       "
",
       ""
      ],
      "text/plain": [
       "  Type       Start         End    Age Gender Currency
",
       "0  KTR  01-10-2019  04-01-2021  30-45      B      EUR
",
       "1  KTR  11-19-2018  05-16-2019  40-60      B      EUR
",
       "2  KTR  03-06-2019  07-08-2020  35-45      M      EUR"
      ]
     },
     "execution_count": 90,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# first, let's explode cmp_name into its components
",
    "# and get a separate DataFrame for those
",
    "
",
    "def unpack_campaign_name(name):
",
    "    # very optimistic method, assumes data in campaign name
",
    "    # is always in good state
",
    "    type_, start, end, age, gender, currency = name.split('_')
",
    "    start = parse(start).date
",
    "    end = parse(end).date
",
    "    return type_, start, end, age, gender, currency
",
    "
",
    "campaign_data = df['cmp_name'].apply(unpack_campaign_name)
",
    "campaign_cols = [
",
    "    'Type', 'Start', 'End', 'Age', 'Gender', 'Currency']
",
    "campaign_df = DataFrame(
",
    "    campaign_data.tolist(), columns=campaign_cols, index=df.index)
",
    "
",
    "#Change the date format of the Start and End to US format mm/dd/yyyy which is the US format.
",
    "#Here we have used strftime function which converts a time stamp to a desired date format that we want.
",
    "#It is in mm-dd-yyyy format.
",
    "#strftime('%m-%d-%Y') return a string. So date is stored as string now instead of date format.
",
    "
",
    "campaign_df['Start'] = campaign_df['Start'].apply(lambda x: x.strftime('%m-%d-%Y')) #Change date format to US format
",
    "campaign_df['End'] = campaign_df['End'].apply(lambda x: x.strftime('%m-%d-%Y')) #Change date format to US format
",
    "
",
    "campaign_df.head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "T0wUrDcq5QQL"
   },
   "outputs": [],
   "source": [
    "# let's join the two dataframes
",
    "df = df.join(campaign_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 136
    },
    "colab_type": "code",
    "id": "bdyO8URC5QQP",
    "outputId": "59b375d8-91e6-497c-a17e-86952f0151d1"
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "
",
       "
",
       "    .dataframe tbody tr th:only-of-type {
",
       "        vertical-align: middle;
",
       "    }
",
       "
",
       "    .dataframe tbody tr th {
",
       "        vertical-align: top;
",
       "    }
",
       "
",
       "    .dataframe thead th {
",
       "        text-align: right;
",
       "    }
",
       "
",
       "
",
       "  
",
       "    
",
       "      
",
       "      cmp_name
",
       "      Type
",
       "      Start
",
       "      End
",
       "      Age
",
       "      Gender
",
       "      Currency
",
       "    
",
       "  
",
       "  
",
       "    
",
       "      0
",
       "      KTR_20191001_20210104_30-45_B_EUR
",
       "      KTR
",
       "      01-10-2019
",
       "      04-01-2021
",
       "      30-45
",
       "      B
",
       "      EUR
",
       "    
",
       "    
",
       "      1
",
       "      KTR_20181119_20190516_40-60_B_EUR
",
       "      KTR
",
       "      11-19-2018
",
       "      05-16-2019
",
       "      40-60
",
       "      B
",
       "      EUR
",
       "    
",
       "    
",
       "      2
",
       "      KTR_20190603_20200807_35-45_M_EUR
",
       "      KTR
",
       "      03-06-2019
",
       "      07-08-2020
",
       "      35-45
",
       "      M
",
       "      EUR
",
       "    
",
       "  
",
       "
",
       ""
      ],
      "text/plain": [
       "                            cmp_name Type       Start         End    Age  \
",
       "0  KTR_20191001_20210104_30-45_B_EUR  KTR  01-10-2019  04-01-2021  30-45   
",
       "1  KTR_20181119_20190516_40-60_B_EUR  KTR  11-19-2018  05-16-2019  40-60   
",
       "2  KTR_20190603_20200807_35-45_M_EUR  KTR  03-06-2019  07-08-2020  35-45   
",
       "
",
       "  Gender Currency  
",
       "0      B      EUR  
",
       "1      B      EUR  
",
       "2      M      EUR  "
      ]
     },
     "execution_count": 92,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# and take a peek: good! We didn't screw up!
",
    "df[['cmp_name'] + campaign_cols].head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "psb4jLFl5QQS"
   },
   "outputs": [],
   "source": [
    "# now let's do the same for the JSON user object
",
    "
",
    "def unpack_user_json(user):
",
    "    # very optimistic as well, expects user objects
",
    "    # to have all attributes
",
    "    user = json.loads(user.strip())
",
    "    return [
",
    "        user['username'],
",
    "        user['email'],
",
    "        user['name'],
",
    "        user['gender'],
",
    "        user['age'],
",
    "        user['address'],
",
    "    ]
",
    "
",
    "user_data = df['user'].apply(unpack_user_json)
",
    "user_cols = [
",
    "    'username', 'email', 'name', 'gender', 'age', 'address']
",
    "user_df = DataFrame(
",
    "    user_data.tolist(), columns=user_cols, index=df.index)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "Qztu_l7j5QQV"
   },
   "outputs": [],
   "source": [
    "# let's join the two dataframes
",
    "df = df.join(user_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 106
    },
    "colab_type": "code",
    "id": "aLndxgKp5QQZ",
    "outputId": "41672907-6159-4554-cf71-ce140dfc0fa4"
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "
",
       "
",
       "    .dataframe tbody tr th:only-of-type {
",
       "        vertical-align: middle;
",
       "    }
",
       "
",
       "    .dataframe tbody tr th {
",
       "        vertical-align: top;
",
       "    }
",
       "
",
       "    .dataframe thead th {
",
       "        text-align: right;
",
       "    }
",
       "
",
       "
",
       "  
",
       "    
",
       "      
",
       "      user
",
       "      username
",
       "      email
",
       "      name
",
       "      gender
",
       "      age
",
       "      address
",
       "    
",
       "  
",
       "  
",
       "    
",
       "      0
",
       "      {"username": "ashley97", "name": "Miranda Cobb...
",
       "      ashley97
",
       "      victor34@hotmail.com
",
       "      Miranda Cobb
",
       "      F
",
       "      35
",
       "      00037 Bowers Mall Apt. 791\nHuangfurt, CA 81423
",
       "    
",
       "    
",
       "      1
",
       "      {"username": "ashley97", "name": "Miranda Cobb...
",
       "      ashley97
",
       "      victor34@hotmail.com
",
       "      Miranda Cobb
",
       "      F
",
       "      35
",
       "      00037 Bowers Mall Apt. 791\nHuangfurt, CA 81423
",
       "    
",
       "  
",
       "
",
       ""
      ],
      "text/plain": [
       "                                                user  username  \
",
       "0  {"username": "ashley97", "name": "Miranda Cobb...  ashley97   
",
       "1  {"username": "ashley97", "name": "Miranda Cobb...  ashley97   
",
       "
",
       "                  email          name gender  age  \
",
       "0  victor34@hotmail.com  Miranda Cobb      F   35   
",
       "1  victor34@hotmail.com  Miranda Cobb      F   35   
",
       "
",
       "                                           address  
",
       "0  00037 Bowers Mall Apt. 791\nHuangfurt, CA 81423  
",
       "1  00037 Bowers Mall Apt. 791\nHuangfurt, CA 81423  "
      ]
     },
     "execution_count": 95,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# and take a peek: good! Also this time we didn't screw up!
",
    "df[['user'] + user_cols].head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "RgZPZq7x5QQb"
   },
   "outputs": [],
   "source": [
    "# now we have the DataFrame completely expanded, so it's
",
    "# time to play with it. First, let's fix those ugly column names
",
    "better_columns = [
",
    "    'Budget', 'Clicks', 'Impressions',
",
    "    'cmp_name', 'Spent', 'user',
",
    "    'Type', 'Start', 'End',
",
    "    'Target Age', 'Target Gender', 'Currency',
",
    "    'Username', 'Email', 'Name',
",
    "    'Gender', 'Age', 'Address',
",
    "]
",
    "df.columns = better_columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "Pj7kk6YZ5QQd"
   },
   "outputs": [],
   "source": [
    "# let's add three other columns
",
    "
",
    "def calculate_extra_columns(df):
",
    "    # Click Through Rate
",
    "    df['CTR'] = df['Clicks'] / df['Impressions']
",
    "    # Cost Per Click
",
    "    df['CPC'] = df['Spent'] / df['Clicks']
",
    "    # Cost Per Impression
",
    "    df['CPI'] = df['Spent'] / df['Impressions']
",
    "    
",
    "calculate_extra_columns(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 136
    },
    "colab_type": "code",
    "id": "2kjY_m5g5QQf",
    "outputId": "d9b93b70-9434-4c53-ff3c-36a1b443d0a0"
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "
",
       "
",
       "    .dataframe tbody tr th:only-of-type {
",
       "        vertical-align: middle;
",
       "    }
",
       "
",
       "    .dataframe tbody tr th {
",
       "        vertical-align: top;
",
       "    }
",
       "
",
       "    .dataframe thead th {
",
       "        text-align: right;
",
       "    }
",
       "
",
       "
",
       "  
",
       "    
",
       "      
",
       "      Spent
",
       "      Clicks
",
       "      Impressions
",
       "      CTR
",
       "      CPC
",
       "      CPI
",
       "    
",
       "  
",
       "  
",
       "    
",
       "      0
",
       "      320061
",
       "      56556
",
       "      499998
",
       "      0.113112
",
       "      5.659187
",
       "      0.640125
",
       "    
",
       "    
",
       "      1
",
       "      294891
",
       "      66268
",
       "      500001
",
       "      0.132536
",
       "      4.449976
",
       "      0.589781
",
       "    
",
       "    
",
       "      2
",
       "      30657
",
       "      29316
",
       "      500001
",
       "      0.058632
",
       "      1.045743
",
       "      0.061314
",
       "    
",
       "  
",
       "
",
       ""
      ],
      "text/plain": [
       "    Spent  Clicks  Impressions       CTR       CPC       CPI
",
       "0  320061   56556       499998  0.113112  5.659187  0.640125
",
       "1  294891   66268       500001  0.132536  4.449976  0.589781
",
       "2   30657   29316       500001  0.058632  1.045743  0.061314"
      ]
     },
     "execution_count": 98,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# let's take a peek
",
    "df[['Spent', 'Clicks', 'Impressions',
",
    "    'CTR', 'CPC', 'CPI']].head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 67
    },
    "colab_type": "code",
    "id": "oz9Kzare5QQi",
    "outputId": "4f68aba0-0982-480d-8ab5-8a557a86dd71"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CTR: 0.1131124524498098 0.1131124524498098
",
      "CPC: 5.659187354126883 5.659187354126883
",
      "CPI: 0.640124560498242 0.640124560498242
"
     ]
    }
   ],
   "source": [
    "# let's take the values of the first row and verify
",
    "clicks = df['Clicks'][0]
",
    "impressions = df['Impressions'][0]
",
    "spent = df['Spent'][0]
",
    "
",
    "CTR = df['CTR'][0]
",
    "CPC = df['CPC'][0]
",
    "CPI = df['CPI'][0]
",
    "
",
    "print('CTR:', CTR, clicks / impressions)
",
    "print('CPC:', CPC, spent / clicks)
",
    "print('CPI:', CPI, spent / impressions)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "DA3dgME75QQl"
   },
   "outputs": [],
   "source": [
    "# let's also add the name of the Day when campaign starts
",
    "def get_day_of_the_week(day):
",
    "    #To get day of week, first we will convert the date that we convert earlier to string with pd.to_datetime()
",
    "    day = pd.to_datetime(day, format = '%m-%d-%Y') #Change to date format
",
    "    number_to_day = dict(enumerate(calendar.day_name, 1)) #Get the day number
",
    "    return number_to_day[day.isoweekday()] #Based on the day number, get the day name of the week
",
    "
",
    "def get_duration(row):
",
    "    #To get difference between start and end, we will convert start and end date to date value first because we changed it
",
    "    #to string in the cell above. To do so, we will use pandas function to_datetime and will change it into date format
",
    "    #The string format of the date is in format 'mm-dd-yyyy', so to parse it to date format, we will add format value 
",
    "    #and assign it to '%m-%d-%Y'
",
    "    
",
    "    start = pd.to_datetime(row['Start'], format = '%m-%d-%Y') #Convert start value to date format
",
    "    end = pd.to_datetime(row['End'], format = '%m-%d-%Y') #Convert end value to date format
",
    "    
",
    "    dur = (end - start).days #Get the difference between end and start time
",
    "    
",
    "    if dur 
",
       "
",
       "    .dataframe tbody tr th:only-of-type {
",
       "        vertical-align: middle;
",
       "    }
",
       "
",
       "    .dataframe tbody tr th {
",
       "        vertical-align: top;
",
       "    }
",
       "
",
       "    .dataframe thead th {
",
       "        text-align: right;
",
       "    }
",
       "
",
       "
",
       "  
",
       "    
",
       "      
",
       "      Start
",
       "      End
",
       "      Duration
",
       "      Day of Week
",
       "    
",
       "  
",
       "  
",
       "    
",
       "      0
",
       "      01-10-2019
",
       "      04-01-2021
",
       "      812
",
       "      Thursday
",
       "    
",
       "    
",
       "      1
",
       "      11-19-2018
",
       "      05-16-2019
",
       "      178
",
       "      Monday
",
       "    
",
       "    
",
       "      2
",
       "      03-06-2019
",
       "      07-08-2020
",
       "      490
",
       "      Wednesday
",
       "    
",
       "  
",
       "
",
       ""
      ],
      "text/plain": [
       "        Start         End  Duration Day of Week
",
       "0  01-10-2019  04-01-2021       812    Thursday
",
       "1  11-19-2018  05-16-2019       178      Monday
",
       "2  03-06-2019  07-08-2020       490   Wednesday"
      ]
     },
     "execution_count": 101,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# let's verify
",
    "df[['Start', 'End', 'Duration', 'Day of Week']].head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "rDpWyScc5QQs"
   },
   "outputs": [],
   "source": [
    "# now let's get rid of the cmp_name and user columns,
",
    "# which we don't need any more, and address too
",
    "final_columns = [
",
    "    'Type', 'Start', 'End', 'Duration', 'Day of Week', 'Budget',
",
    "    'Currency', 'Clicks', 'Impressions', 'Spent', 'CTR', 'CPC',
",
    "    'CPI', 'Target Age', 'Target Gender', 'Username', 'Email',
",
    "    'Name', 'Gender', 'Age'
",
    "]
",
    "df = df[final_columns]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "AF0Xw7Ih5QQu"
   },
   "source": [
    "## 4 - Saving to a file in different formats"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "FbA-2X085QQu"
   },
   "outputs": [],
   "source": [
    "# CSV format
",
    "# df.to_csv('df.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "Jm1btUom5QQz"
   },
   "outputs": [],
   "source": [
    "# JSON format
",
    "# df.to_json('df.json')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "SVZ8Ez2Y5QQ6"
   },
   "outputs": [],
   "source": [
    "# Spreadsheet format
",
    "# df.to_excel('df.xls')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "YUAkFPfh5QRB"
   },
   "source": [
    "## 5 - Visualizing results"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "9QpobLjT5QRD"
   },
   "source": [
    "First let's take care of the graphics, we need to instruct the notebook to use matplotlib inline and the serif font family."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "CZMinjG95QRE"
   },
   "outputs": [],
   "source": [
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "Pzwj7xCl5QRG"
   },
   "outputs": [],
   "source": [
    "import pylab
",
    "pylab.rcParams.update({'font.family' : 'serif'})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 284
    },
    "colab_type": "code",
    "id": "mYxt9wqm5QRL",
    "outputId": "70b7241b-68eb-4d49-cd3c-61c7eaf97fdb"
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "
",
       "
",
       "    .dataframe tbody tr th:only-of-type {
",
       "        vertical-align: middle;
",
       "    }
",
       "
",
       "    .dataframe tbody tr th {
",
       "        vertical-align: top;
",
       "    }
",
       "
",
       "    .dataframe thead th {
",
       "        text-align: right;
",
       "    }
",
       "
",
       "
",
       "  
",
       "    
",
       "      
",
       "      Duration
",
       "      Budget
",
       "      Clicks
",
       "      Impressions
",
       "      Spent
",
       "      CTR
",
       "      CPC
",
       "      CPI
",
       "      Age
",
       "    
",
       "  
",
       "  
",
       "    
",
       "      count
",
       "      4920.000000
",
       "      4920.000000
",
       "      4920.000000
",
       "      4920.000000
",
       "      4920.000000
",
       "      4920.000000
",
       "      4920.000000
",
       "      4920.000000
",
       "      4920.000000
",
       "    
",
       "    
",
       "      mean
",
       "      383.604268
",
       "      504930.672358
",
       "      40222.850203
",
       "      499999.458943
",
       "      250462.501016
",
       "      0.080446
",
       "      9.862352
",
       "      0.500926
",
       "      52.943089
",
       "    
",
       "    
",
       "      std
",
       "      230.213040
",
       "      286093.538484
",
       "      21672.391822
",
       "      2.026843
",
       "      218451.424805
",
       "      0.043345
",
       "      18.669847
",
       "      0.436903
",
       "      21.061735
",
       "    
",
       "    
",
       "      min
",
       "      1.000000
",
       "      1322.000000
",
       "      701.000000
",
       "      499992.000000
",
       "      143.000000
",
       "      0.001402
",
       "      0.002627
",
       "      0.000286
",
       "      18.000000
",
       "    
",
       "    
",
       "      25%
",
       "      189.000000
",
       "      255732.500000
",
       "      22224.500000
",
       "      499998.000000
",
       "      70305.500000
",
       "      0.044449
",
       "      1.823865
",
       "      0.140611
",
       "      34.000000
",
       "    
",
       "    
",
       "      50%
",
       "      372.000000
",
       "      504126.000000
",
       "      36854.500000
",
       "      499999.000000
",
       "      189206.000000
",
       "      0.073709
",
       "      5.180995
",
       "      0.378414
",
       "      52.000000
",
       "    
",
       "    
",
       "      75%
",
       "      561.000000
",
       "      753168.250000
",
       "      55770.250000
",
       "      500001.000000
",
       "      386425.750000
",
       "      0.111541
",
       "      11.586214
",
       "      0.772860
",
       "      71.000000
",
       "    
",
       "    
",
       "      max
",
       "      1064.000000
",
       "      999870.000000
",
       "      98858.000000
",
       "      500007.000000
",
       "      981524.000000
",
       "      0.197716
",
       "      614.512590
",
       "      1.963056
",
       "      90.000000
",
       "    
",
       "  
",
       "
",
       ""
      ],
      "text/plain": [
       "          Duration         Budget        Clicks    Impressions          Spent  \
",
       "count  4920.000000    4920.000000   4920.000000    4920.000000    4920.000000   
",
       "mean    383.604268  504930.672358  40222.850203  499999.458943  250462.501016   
",
       "std     230.213040  286093.538484  21672.391822       2.026843  218451.424805   
",
       "min       1.000000    1322.000000    701.000000  499992.000000     143.000000   
",
       "25%     189.000000  255732.500000  22224.500000  499998.000000   70305.500000   
",
       "50%     372.000000  504126.000000  36854.500000  499999.000000  189206.000000   
",
       "75%     561.000000  753168.250000  55770.250000  500001.000000  386425.750000   
",
       "max    1064.000000  999870.000000  98858.000000  500007.000000  981524.000000   
",
       "
",
       "               CTR          CPC          CPI          Age  
",
       "count  4920.000000  4920.000000  4920.000000  4920.000000  
",
       "mean      0.080446     9.862352     0.500926    52.943089  
",
       "std       0.043345    18.669847     0.436903    21.061735  
",
       "min       0.001402     0.002627     0.000286    18.000000  
",
       "25%       0.044449     1.823865     0.140611    34.000000  
",
       "50%       0.073709     5.180995     0.378414    52.000000  
",
       "75%       0.111541    11.586214     0.772860    71.000000  
",
       "max       0.197716   614.512590     1.963056    90.000000  "
      ]
     },
     "execution_count": 108,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 402
    },
    "colab_type": "code",
    "id": "vg3UZLDj5QRM",
    "outputId": "334be30d-b83c-48ad-e7af-61ab5cf34449"
   },
   "outputs": [
    {
     "data": {
      "image/png":

We have identified a number of improvements to the Ad Campaign case study on 5/8. Please document in the first cell of the notebook how you suggest to make these improvements and exactly what cells...

Solution