#import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
#Create a dataframe
df=pd.read_csv("E:\dataset\zomato.csv",encoding='latin-1')
df.head()
df.shape
(9551, 21)
df.columns
Index(['Restaurant ID', 'Restaurant Name', 'Country Code', 'City', 'Addres s', 'Locality', 'Locality Verbose', 'Longitude', 'Latitude', 'Cuisines', 'Average Cost for two', 'Currency', 'Has Table booking', 'Has Online delivery', 'Is delivering now', 'Switch to order menu', 'Price range', 'Aggregate rating', 'Rating color', 'Rating text', 'Votes'], dtype='object')
df.info()
df.describe()
#Get missing values
df.isnull().sum()
[k for k in df.columns if df[k].isnull().sum()>0]
sns.heatmap(df.isnull(),yticklabels=False,cbar=False,cmap='viridis')
df_country=pd.read_excel('E:\dataset\Country-Code.xlsx')
df_country
df.columns
Index(['Restaurant ID', 'Restaurant Name', 'Country Code', 'City', 'Addres s', 'Locality', 'Locality Verbose', 'Longitude', 'Latitude', 'Cuisines', 'Average Cost for two', 'Currency', 'Has Table booking', 'Has Online delivery', 'Is delivering now', 'Switch to order menu', 'Price range', 'Aggregate rating', 'Rating color', 'Rating text', 'Votes'], dtype='object')
df1=pd.merge(df,df_country,on='Country Code',how='left')
df1.head(3)
df1.dtypes
df1.Country.value_counts()
country_names=df1.Country.value_counts().index
country_names
Index(['India', 'United States', 'United Kingdom', 'Brazil', 'South Africa', 'UAE', 'New Zealand', 'Turkey', 'Australia', 'Phillipines', 'Indonesia', 'Qatar', 'Singapore', 'Sri Lanka', 'Canada'], dtype='object')
country_values=df1.Country.value_counts().values
country_values
array([8652, 434, 80, 60, 60, 60, 40, 34, 24, 22, 21, 20, 20, 20, 4], dtype=int64)
#pie chart
plt.pie(country_values,labels=country_names)
#Top-3 conuntries that uses zomato
plt.pie(country_values[:3],labels=country_names[:3])
plt.pie(country_values[:3],labels=country_names[:3],autopct="%1.2f%%")
df1.columns
Index(['Restaurant ID', 'Restaurant Name', 'Country Code', 'City', 'Addres s', 'Locality', 'Locality Verbose', 'Longitude', 'Latitude', 'Cuisines', 'Average Cost for two', 'Currency', 'Has Table booking', 'Has Online delivery', 'Is delivering now', 'Switch to order menu', 'Price range', 'Aggregate rating', 'Rating color', 'Rating text', 'Votes', 'Country'], dtype='object')
df1.groupby(['Aggregate rating','Rating color','Rating text']).size()
ratings=df1.groupby(['Aggregate rating','Rating color','Rating text']).size().reset_index()
ratings
ratings.head()
sns.barplot(x="Aggregate rating",y="Rating Count",data=ratings)
plt.show()
import matplotlib
matplotlib.rcParams['figure.figsize']=(12,6)
sns.barplot(x="Aggregate rating",y="Rating Count",hue='Rating color',palette=['White','Blue
plt.show()
sns.countplot(x="Rating color",data=ratings,palette=['Blue','Blue','Red','Orange','Yellow',
plt.show()
#Find out the countries that has given 0 rating
df1.columns
Index(['Restaurant ID', 'Restaurant Name', 'Country Code', 'City', 'Address', 'Locality', 'Locality Verbose', 'Longitude', 'Latitude', 'Cuisines', 'Average Cost for two', 'Currency', 'Has Table booking', 'Has Online delivery', 'Is delivering now', 'Switch to order menu', 'Price range', 'Aggregate rating', 'Rating color', 'Rating text', 'Votes', 'Country'], dtype='object')
#Find out which currency is used by which country
df1.columns
Index(['Restaurant ID', 'Restaurant Name', 'Country Code', 'City', 'Addres s', 'Locality', 'Locality Verbose', 'Longitude', 'Latitude', 'Cuisines', 'Average Cost for two', 'Currency', 'Has Table booking', 'Has Online delivery', 'Is delivering now', 'Switch to order menu', 'Price range', 'Aggregate rating', 'Rating color', 'Rating text', 'Votes', 'Country'], dtype='object')
df1[['Country','Currency']].groupby(['Country','Currency']).size().reset_index()
#Which countries do have online delivery
df1[df1['Has Online delivery']=='Yes'].Country.value_counts()
India 2423
UAE 28
Name: Country, dtype: int64
df1[['Has Online delivery','Country']].groupby(['Has Online delivery','Country']).size().re
#Create a pie chart for top-5 cities distribution
df1.columns
Index(['Restaurant ID', 'Restaurant Name', 'Country Code', 'City', 'Addres s', 'Locality', 'Locality Verbose', 'Longitude', 'Latitude', 'Cuisines', 'Average Cost for two', 'Currency', 'Has Table booking', 'Has Online delivery', 'Is delivering now', 'Switch to order menu', 'Price range', 'Aggregate rating', 'Rating color', 'Rating text', 'Votes', 'Country'], dtype='object')
df1.City.value_counts().index
Index(['New Delhi', 'Gurgaon', 'Noida', 'Faridabad', 'Ghaziabad', 'Ahmedabad', 'Guwahati', 'Lucknow', 'Amritsar', 'Bhubaneshwar', ... 'Dicky Beach', 'Forrest', 'Vineland Station', 'Potrero', 'Mohali', 'Lakes Entrance', 'Bandung', 'Huskisson', 'Princeton', 'Lincoln'], dtype='object', length=141)
city_values=df1.City.value_counts().values
city_labels=df1.City.value_counts().index
plt.pie(city_values[:5],labels=city_labels[:5])
([<matplotlib.patches.Wedge at 0x192866bc400>, <matplotlib.patches.Wedge at 0x192866bc190>, <matplotlib.patches.Wedge at 0x192866e8c70>, <matplotlib.patches.Wedge at 0x192866e84c0>, <matplotlib.patches.Wedge at 0x192866e8820>], [Text(-0.6145352824185932, 0.9123301960708633, 'New Delhi'), Text(0.0623675251198054, -1.0982305276263407, 'Gurgaon'), Text(0.8789045225625368, -0.6614581167535246, 'Noida'), Text(1.0922218418223437, -0.13058119407559224, 'Faridabad'), Text(1.099946280005612, -0.010871113182029924, 'Ghaziabad')])
plt.pie(city_values[:5],labels=city_labels[:5],autopct='%1.2f%%')
([<matplotlib.patches.Wedge at 0x19285c4c4f0>, <matplotlib.patches.Wedge at 0x19285c4c7f0>, <matplotlib.patches.Wedge at 0x19285c4c790>, <matplotlib.patches.Wedge at 0x19285c158e0>, <matplotlib.patches.Wedge at 0x19285c152b0>], [Text(-0.6145352824185932, 0.9123301960708633, 'New Delhi'), Text(0.0623675251198054, -1.0982305276263407, 'Gurgaon'), Text(0.8789045225625368, -0.6614581167535246, 'Noida'), Text(1.0922218418223437, -0.13058119407559224, 'Faridabad'), Text(1.099946280005612, -0.010871113182029924, 'Ghaziabad')], [Text(-0.3352010631374145, 0.497634652402289, '68.87%'), Text(0.0340186500653484, -0.5990348332507311, '14.07%'), Text(0.47940246685229276, -0.36079533641101336, '13.59%'), Text(0.5957573682667329, -0.07122610585941394, '3.16%'), Text(0.5999706981848791, -0.005929698099289049, '0.31%')])
Silan Software is one of the India's leading provider of offline & online training for Java, Python, AI (Machine Learning, Deep Learning), Data Science, Software Development & many more emerging Technologies.
We provide Academic Training || Industrial Training || Corporate Training || Internship || Java || Python || AI using Python || Data Science etc