45265.ipyn
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"CINDY HERRERA DSC550 WEEK 5"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Applied Text Analysis With Python Exercises"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import string\n",
"import re\n",
"import matplotlib.pyplot as plt\n",
"from collections import Counter"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"#Step 1: Load data into a dataframe"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"addr1 = \"articles1.csv\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Step 2: check the dimension of the table/look at the data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": []
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The dimension of the table is: (50000, 10)\n"
]
},
{
"data": {
"text/html": [
"
\n",
"