#!/us
in/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Sep 25 15:09:01 2021
@author: Moha
"""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
'''
he said use matrix operation not loops
'''
dataset = pd.read_csv('king_county.csv')
'''
(f) Several features are listed as numerical but have only a small number of discrete values. This includes
”bedrooms”, ”bathrooms” and ”floors”. For each of these three features, identify the unique values
that are observed (you can use the unique function from Pandas). For each of the 3 features, generate a
oxplot (you can use the boxplot function from Pandas) of the price. Specifically, you should generate
one plot for each feature grouped by the feature value.
'''
'''
use full matrix operation whenever possible rather than using for loops.
'''
ed = dataset.bedrooms.unique()
ath = dataset.bathrooms.unique()
floor = dataset.floors.unique()
#print(bed)
#print(bath)
#print(floor)
#print(pd.unique(dataset['bedrooms']))
# boxplot for the entire bedrooms features with price
dataset.boxplot(by = 'bedrooms', column =['price'], grid = False)
# I think I have to plot it according to the unique part
# and when I do that it gives me an e
or becase the proce length is greater than the unique length
'''
df = pd.DataFrame(list(zip(bedrooms_unique, bathrooms_unique,floors_unique,price_unique)), columns =['bedrooms', 'bath','flor','price'])
print (df)
df.boxplot(by = 'bedrooms', column =['price'], grid = False)
'''
'''
(g) Consider the following numerical features: sqft living, sqrft lot, sqft living15, sqft lot15. Cal-
culate the co-variance matrix of these four features. Generate a scatter plot of the data using
sqft_living and sqft_living15, and another scatter plot using sqft_lot with sqft_lot15.
Question:
what do you observe from the scatter plot?
Are these features redundant?
'''
df = dataset[['sqft_living', 'sqft_lot', 'sqft_living15', 'sqft_lot15']].copy()
print (df)
covMat = df.cov()
print (covMat)
#plt.matshow(covMat)
pd.plotting.scatter_matrix(df, alpha=0.2) # plotting the dataframe itself
pd.plotting.scatter_matrix(covMat, alpha=0.2) # looks weird
#here I could not split the matrix and the generated plot is for all the columns
#it should be one plot for sqft_living and sqft_living15, and another scatter plot using sqft_lot with sqft_lot15.
id,date,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15,price
XXXXXXXXXX,7/9/2014,4,2.5,2720,10006,2,0,0,3,9,2720,0,1989,0,98074,47.6295, XXXXXXXXXX,2720,10759,5.9495
XXXXXXXXXX,7/18/2014,2,2.5,2600,5000,1,0,0,5,8,1300,1300,1926,0,98126,47.5806, XXXXXXXXXX,2260,5000,6.65
XXXXXXXXXX,7/7/2014,5,1.75,1650,3000,1.5,0,0,3,8,1650,0,1902,0,98144,47.5955, XXXXXXXXXX,1740,4000,4.43
XXXXXXXXXX,4/28/2015,4,1.75,1720,9600,1,0,0,4,8,1720,0,1969,0,98059,47.4764, XXXXXXXXXX,1660,10720,3.8
XXXXXXXXXX,9/4/2014,6,2.25,3830,11180,1,0,2,5,9,2440,1390,1962,0,98008,47.5849, XXXXXXXXXX,2500,10400,8.87
XXXXXXXXXX,2/24/2015,4,2.5,2210,213008,1,0,0,4,7,1210,1000,1975,0,98038,47.4039,-121.98,2270,52707,4.15
XXXXXXXXXX,3/29/2015,3,2.5,2600,23361,1.5,1,4,3,8,2150,450,1912,0,98146,47.4997, XXXXXXXXXX,1700,14700,5.4
XXXXXXXXXX,11/13/2014,4,1.5,1220,4900,1,0,0,3,6,1220,0,1942,0,98118,47.5292, XXXXXXXXXX,1410,3000,2.5
XXXXXXXXXX,8/13/2014,3,2.75,3040,24192,2,0,0,4,10,3040,0,1987,0,98034,47.7108, XXXXXXXXXX,2770,5728,8.25
XXXXXXXXXX,5/4/2015,2,1,1010,5408,1,0,0,4,6,1010,0,1926,0,98032,47.3759, XXXXXXXXXX,980,7800,2.25
XXXXXXXXXX,12/8/2014,4,2.5,2660,4082,2,0,0,3,7,2660,0,2010,0,98042,47.3414, XXXXXXXXXX,2390,4876,2.87
XXXXXXXXXX,6/30/2014,4,2.5,2680,12215,1,1,4,3,9,1590,1090,1956,0,98166,47.4396, XXXXXXXXXX,2960,19964,13.1
XXXXXXXXXX,4/23/2015,3,2,1640,9825,1,0,0,4,7,1090,550,1971,0,98034,47.7244,-122.2,1500,9750,4.55
XXXXXXXXXX,7/21/2014,4,2.25,2470,17008,2,0,0,4,8,2470,0,1979,0,98075,47.5924, XXXXXXXXXX,2470,31798,6.25
XXXXXXXXXX,7/10/2014,4,2.5,1830,3868,2,0,0,3,7,1830,0,2007,0,98118,47.5186, XXXXXXXXXX,2330,3868,2.75
XXXXXXXXXX,12/8/2014,3,2.5,1860,5321,2,0,0,3,7,1860,0,2000,0,98038,47.3848, XXXXXXXXXX,1940,5205,3.26995
XXXXXXXXXX,5/12/2014,4,2.25,1800,8623,1,0,0,4,8,1360,440,1980,0,98033,47.692, XXXXXXXXXX,2370,8623,5.22
XXXXXXXXXX,10/2/2014,1,1,500,7440,1,0,0,1,5,500,0,1928,0,98106,47.5252, XXXXXXXXXX,1350,7440,1.25
XXXXXXXXXX,2/6/2015,2,1,770,5680,1,0,0,4,6,770,0,1929,0,98117,47.6951, XXXXXXXXXX,1170,5514,3.65
XXXXXXXXXX,6/17/2014,3,2.5,2970,21907,2,0,0,3,9,2970,0,1998,2006,98059,47.4741, XXXXXXXXXX,2040,27917,4.995
XXXXXXXXXX,3/18/2015,3,1.5,1480,7117,1,0,0,3,7,1170,310,1960,0,98028,47.7766, XXXXXXXXXX,2230,14775,2.85
XXXXXXXXXX,7/24/2014,2,1.5,1180,1034,2,0,0,3,7,1120,60,2001,0,98115,47.678, XXXXXXXXXX,1137,1034,4.1
XXXXXXXXXX,7/7/2014,3,2.5,1990,12793,2,0,0,3,8,1990,0,1993,0,98028,47.7347, XXXXXXXXXX,2290,9035,4.5
XXXXXXXXXX,12/8/2014,4,2.5,3220,6399,2,0,0,3,9,3220,0,2004,0,98075,47.5883, XXXXXXXXXX,2850,6399,6.97
XXXXXXXXXX,11/18/2014,3,1,1370,17859,1,0,0,4,7,1150,220,1930,0,98024,47.5617, XXXXXXXXXX,1460,47044,2.75
XXXXXXXXXX,10/28/2014,3,1,1570,2280,2,0,0,3,7,1570,0,1922,0,98119,47.6413, XXXXXXXXXX,1580,2640,6.85
XXXXXXXXXX,2/18/2015,3,2.5,1830,6807,2.5,0,0,5,7,1830,0,1954,0,98155,47.7613, XXXXXXXXXX,1340,6807,4.4
XXXXXXXXXX,9/11/2014,4,2.25,2150,27345,2,0,0,5,8,2150,0,1976,0,98059,47.469, XXXXXXXXXX,2200,11923,4.1
XXXXXXXXXX,1/26/2015,4,1.5,2480,6383,1,0,0,3,7,1380,1100,1946,0,98119,47.6445, XXXXXXXXXX,1440,6000,6.5
XXXXXXXXXX,1/22/2015,5,3,2010,7264,1,0,0,3,7,1290,720,1990,0,98103,47.6945,-122.33,1510,7326,4.65
XXXXXXXXXX,11/6/2014,2,1,880,5750,1,0,0,3,6,880,0,1939,0,98126,47.5642, XXXXXXXXXX,1190,5750,2.8
XXXXXXXXXX,11/20/2014,3,2.25,1680,8450,1,0,0,3,8,1340,340,1960,0,98177,47.7575, XXXXXXXXXX,1850,8300,3.85
XXXXXXXXXX,11/13/2014,3,1.5,1000,6914,1,0,0,3,7,1000,0,1947,0,98125,47.7144, XXXXXXXXXX,1000,6947,3.78
XXXXXXXXXX,3/18/2015,4,2,1850,9126,1,0,0,5,7,1850,0,1963,0,98059,47.5009, XXXXXXXXXX,1730,9110,3.29
XXXXXXXXXX,9/15/2014,3,3.25,2950,4446,2,0,0,3,9,2450,500,2001,0,98116,47.5852, XXXXXXXXXX,1930,4255,9.3
XXXXXXXXXX,4/14/2015,3,1.75,1670,7210,1,0,0,5,8,1670,0,1967,0,98008,47.6344, XXXXXXXXXX,2200,7210,6.78
XXXXXXXXXX,5/6/2015,5,2.5,3000,10560,1,0,0,3,8,1500,1500,1966,0,98004,47.6249, XXXXXXXXXX,2690,11616,8
XXXXXXXXXX,10/7/2014,3,1.75,1370,9680,1,0,0,4,7,1370,0,1977,0,98074,47.6127, XXXXXXXXXX,1370,10208,4
XXXXXXXXXX,5/27/2014,3,2.5,1590,2550,3,0,0,3,7,1590,0,1985,0,98117,47.6772, XXXXXXXXXX,1260,5100,4.88
XXXXXXXXXX,3/28/2015,2,1.5,980,853,2,0,0,3,7,820,160,2009,0,98144,47.5925, XXXXXXXXXX,1130,1270,3.78
XXXXXXXXXX,9/25/2014,3,2.5,1520,3003,2,0,0,3,7,1520,0,2009,0,98059,47.4876, XXXXXXXXXX,1820,3030,3.3
XXXXXXXXXX,7/24/2014,4,2.5,1770,5000,2,0,0,3,7,1770,0,2004,0,98038,47.3503, XXXXXXXXXX,2080,5100,2.915
XXXXXXXXXX,8/7/2014,2,1,820,6550,1,0,0,3,7,820,0,1949,2012,98126,47.5478, XXXXXXXXXX,1640,6550,3.7
XXXXXXXXXX,10/30/2014,3,1.75,1460,8372,1,0,0,4,7,1460,0,1981,0,98042,47.3683, XXXXXXXXXX,1220,7803,2.25
XXXXXXXXXX,6/25/2014,4,4,3570,8250,2,0,0,3,10,2860,710,2015,0,98040,47.5784, XXXXXXXXXX,2230,10000,5.97326
XXXXXXXXXX,1/27/2015,3,1,960,10181,1,0,0,3,7,960,0,1961,0,98034,47.7231, XXXXXXXXXX,1740,10194,3.22
XXXXXXXXXX,6/27/2014,2,1,1020,8100,1,0,0,3,6,1020,0,1940,0,98168,47.4971, XXXXXXXXXX,1200,12500,1.95
XXXXXXXXXX,3/18/2015,4,2.25,2070,20280,2,0,0,4,7,2070,0,1968,0,98072,47.774, XXXXXXXXXX,2190,21560,4.95
XXXXXXXXXX,2/24/2015,3,1.75,2160,22702,1,0,0,4,7,2160,0,1981,0,98019,47.7355, XXXXXXXXXX,1820,22687,2.6
XXXXXXXXXX,10/23/2014,4,3,2120,13000,2,0,0,4,8,2120,0,1978,0,98059,47.4745, XXXXXXXXXX,2180,11440,3.85
XXXXXXXXXX,6/5/2014,4,2.75,2020,10720,1,0,0,4,8,1420,600,1976,0,98052,47.6373, XXXXXXXXXX,2190,10164,5.55
XXXXXXXXXX,3/26/2015,3,2.5,1400,4800,1,0,0,3,7,1200,200,1921,0,98117,47.6865, XXXXXXXXXX,1440,3840,5.37
XXXXXXXXXX,3/30/2015,3,1.5,1400,9750,1,0,0,4,6,1400,0,1964,0,98030,47.3768,-122.17,1160,9750,2.75
XXXXXXXXXX,12/10/2014,3,2.5,2640,13775,1,0,0,3,8,1550,1090,1978,0,98005,47.5875, XXXXXXXXXX,2120,12432,5.3
XXXXXXXXXX,7/21/2014,3,1.5,2540,9520,1,0,0,3,8,1500,1040,1959,0,98115,47.6834, XXXXXXXXXX,1870,6800,6.9
XXXXXXXXXX,8/15/2014,3,2.25,2680,41250,2,0,0,3,7,2680,0,1984,0,98045,47.4817, XXXXXXXXXX,1940,47044,5.9
XXXXXXXXXX,3/10/2015,3,2.75,2080,9600,1,0,0,3,7,2080,0,1988,0,98028,47.7698, XXXXXXXXXX,2220,9600,4.15
XXXXXXXXXX,11/10/2014,4,2,2410,4680,2,0,0,3,9,2410,0,1974,0,98008,47.6234, XXXXXXXXXX,1910,4611,4.7
XXXXXXXXXX,5/6/2015,4,1.75,1730,7245,1,0,0,4,7,880,850,1955,0,98007,47.5995, XXXXXXXXXX,1550,7245,4.23
XXXXXXXXXX,10/30/2014,3,2.5,2990,10711,1,1,4,3,9,1560,1430,1976,1991,98198,47.3573, XXXXXXXXXX,2870,11476,8.2
XXXXXXXXXX,2/10/2015,3,2.25,1220,5739,1,0,0,3,7,790,430,1984,0,98115,47.6952, XXXXXXXXXX,1870,5739,2.8
XXXXXXXXXX,1/27/2015,2,1.75,1500,4158,1,0,0,4,7,1220,280,1947,0,98103,47.7006,-122.35,1270,4081,3.4
XXXXXXXXXX,8/7/2014,3,1.75,1580,7875,1,0,0,3,7,1580,0,1979,0,98022,47.1958, XXXXXXXXXX,1560,8314,1.95
XXXXXXXXXX,12/11/2014,3,3,1860,2875,2,0,0,3,8,1710,150,2009,0,98126,47.5511, XXXXXXXXXX,1350,4830,3.9995
XXXXXXXXXX