import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from matplotlib import pyplot as plt

import os
print(os.listdir("../input"))

# Any results you write to the current directory are saved as output.

['googleplaystore.csv']

df = pd.read_csv("../input/googleplaystore.csv")
df.head()

features = ['Category','Rating','Reviews','Size','Type','Price','Content Rating']
y = ['Installs']

dropping a row with app name 'Life Made WI-Fi Touchscreen Photo Frame' as it has shifted data:

print(df.columns)
df = df.set_index("App")
print(df.columns)
df = df.drop("Life Made WI-Fi Touchscreen Photo Frame")
df = df.reset_index()
print(df.columns)
#df[df['App'] == 'Life Made WI-Fi Touchscreen Photo Frame']

Index(['App', 'Category', 'Rating', 'Reviews', 'Size', 'Installs', 'Type',
       'Price', 'Content Rating', 'Genres', 'Last Updated', 'Current Ver',
       'Android Ver'],
      dtype='object')
Index(['Category', 'Rating', 'Reviews', 'Size', 'Installs', 'Type', 'Price',
       'Content Rating', 'Genres', 'Last Updated', 'Current Ver',
       'Android Ver'],
      dtype='object')
Index(['App', 'Category', 'Rating', 'Reviews', 'Size', 'Installs', 'Type',
       'Price', 'Content Rating', 'Genres', 'Last Updated', 'Current Ver',
       'Android Ver'],
      dtype='object')

Now, we impute the median rating for all the NaN values in the column Rating 1.Remove NANs from type and other features. 2.write a function and change the value of type (free=0 and paid = 1) 3.plot individual graphs

df['Rating'].fillna(df.median()['Rating'],inplace=True)

Function for converting size into a uniform unit(M). Varies with device => -1

#print(df['Size'])

def size_convert(d):
    s=d[4]
    if s[len(s)-1] == "M":
        return float(s[:len(s)-1])
    elif s[len(s)-1] == "k" or s[len(s)-1] == "K":
        return (float(s[0:len(s)-1])/1000)
    else:
        return -1

df['Size'] = df.apply(size_convert, axis=1)

def clean_Installs(b):
    a=b[5]
    return(a.replace("+","").replace(",",""))
df['Installs'] = df.apply(clean_Installs, axis=1)

df.head(20)

def clean_Price(b):
    a=b[7]
    return(a.replace("$",""))
df['Price'] = df.apply(clean_Price, axis=1)

df.Reviews = df.Reviews.apply(int)
df.Installs = df.Installs.apply(int)
df.Price = df.Price.apply(float)

Making the single record with Type as NaN 'Free'

fil1 = df.Type.isnull()
fil2 = df.Price == 0
df['Type'].fillna("Free",inplace=True)

Making dummies for the types: Free and Paid

#dummy = pd.get_dummies(df['Type'])
#df = pd.concat([df,dummy],axis = 1)
#df.head()

 def make_others(d):
        a = d[1]
        k=df.Category.unique()
        k=k.tolist()
        k.remove('SPORTS')
        k.remove('COMMUNICATION')
        k.remove('ENTERTAINMENT')
        k.remove('HEALTH_AND_FITNESS')
        k.remove('GAME')
        k.remove('FAMILY')
        k.remove('SOCIAL')
        k.remove('PHOTOGRAPHY')
        k.remove('TRAVEL_AND_LOCAL')
        k.remove('TOOLS')
        k.remove('PRODUCTIVITY')
        k.remove('VIDEO_PLAYERS')
        k.remove('NEWS_AND_MAGAZINES')
        k.remove('LIFESTYLE')
        k.remove('PERSONALIZATION')
        k.remove('LIBRARIES_AND_DEMO')
        k.remove('SHOPPING')
        if a in k:
            return "OTHERS"
        else:
            return a

df['Category'] = df.apply(make_others, axis=1)

df_cat = df.groupby(df['Category']).agg({'Installs':np.sum}).reset_index()
plt.bar(df_cat.Category,df_cat.Installs)
plt.grid()
plt.xticks(rotation=90)
plt.xlabel('Category')
plt.ylabel('Installs(*10^10)')
plt.title('Category graph')
plt.show()

df_rat = df.groupby(df['Rating']).agg({'Installs':np.average}).reset_index()
plt.plot(df_rat.Rating,df_rat.Installs,marker='o')
plt.grid()
#plt.xticks(rotation=90)
plt.xlabel('Rating')
plt.ylabel('Installs(*10^10)')
plt.title('Rating graph')
plt.show()

print(df.Reviews.max(),df.Reviews.min())
import numpy as np

custom_bucket_array = np.arange(0,78158306,10000000)
df["Reviews"] = pd.cut(df["Reviews"],custom_bucket_array)

fin1 = df.groupby(df.Reviews).agg({'Installs':np.average}).reset_index()
#print(fin1)
categories = fin1['Reviews'].cat.categories
ind = np.array([x for x, _ in enumerate(categories)])
width = 0.15

78158306 0

plt.bar(ind,fin1["Installs"],width, label='Installs')
plt.xticks(ind, categories)
plt.xticks(rotation = 90)
plt.xlabel('Rating')
plt.ylabel('Installs(*10^10)')
plt.title('Rating graph')
plt.show()

from scipy import stats
df_1 = df[df['Size'] != -1]
df_size = df_1.groupby(df_1['Size']).agg({'Installs':np.sum}).reset_index()
plt.bar(df_size.Size,df_size.Installs)
plt.grid()
#plt.xticks(rotation=90)
plt.xlabel('Size')
plt.ylabel('Installs(*10^9)')
plt.title('Size graph')
plt.show()

df_type = df.groupby(df['Type']).agg({'Installs':np.average}).reset_index()
plt.bar(df_type.Type,df_type.Installs)
plt.grid()
#plt.xticks(rotation=90)
plt.xlabel('Type')
plt.ylabel('Installs(*10^9)')
plt.title('Type graph')
plt.show()

plt.scatter(df.Price,df.Installs)
plt.grid()
#plt.xticks(rotation=90)
plt.xlabel('Price')
plt.ylabel('Installs(*10^9)')
plt.title('Price graph')
plt.show()

df = df[df['Price']<290]

df_2 = df[df['Price']!=0]
df_type = df_2.groupby(df_2['Price']).agg({'Installs':np.sum}).reset_index()
plt.scatter(df_type.Price,df_type.Installs)
plt.grid()
#plt.xticks(rotation=90)
plt.xlabel('Price')
plt.ylabel('Installs(*10^7)')
plt.title('Price graph')
plt.show()

df_cnt = df.groupby(df['Content Rating']).agg({'Installs':np.average}).reset_index()
plt.bar(df_cnt['Content Rating'],df_cnt.Installs)
plt.grid()
#plt.xticks(rotation=90)
plt.xlabel('Content Rating')
plt.ylabel('Installs(*10^11)')
plt.xticks(rotation=90)
plt.title('Content Rating graph')
plt.show()

	App	Category	Rating	Reviews	Size	Installs	Type	Content Rating	Genres	Last Updated	Current Ver	Android Ver
0	Photo Editor & Candy Camera & Grid & ScrapBook	ART_AND_DESIGN	4.1	159	19M	10,000+	Free	Everyone	Art & Design	January 7, 2018	1.0.0	4.0.3 and up
1	Coloring book moana	ART_AND_DESIGN	3.9	967	14M	500,000+	Free	Everyone	Art & Design;Pretend Play	January 15, 2018	2.0.0	4.0.3 and up
2	U Launcher Lite – FREE Live Cool Themes, Hide ...	ART_AND_DESIGN	4.7	87510	8.7M	5,000,000+	Free	Everyone	Art & Design	August 1, 2018	1.2.4	4.0.3 and up
3	Sketch - Draw & Paint	ART_AND_DESIGN	4.5	215644	25M	50,000,000+	Free	Teen	Art & Design	June 8, 2018	Varies with device	4.2 and up
4	Pixel Draw - Number Art Coloring Book	ART_AND_DESIGN	4.3	967	2.8M	100,000+	Free	Everyone	Art & Design;Creativity	June 20, 2018	1.1	4.4 and up

	App	Category	Rating	Reviews	Size	Installs	Type	Content Rating	Genres	Last Updated	Current Ver	Android Ver
0	Photo Editor & Candy Camera & Grid & ScrapBook	ART_AND_DESIGN	4.1	159	19.0	10000	Free	Everyone	Art & Design	January 7, 2018	1.0.0	4.0.3 and up
1	Coloring book moana	ART_AND_DESIGN	3.9	967	14.0	500000	Free	Everyone	Art & Design;Pretend Play	January 15, 2018	2.0.0	4.0.3 and up
2	U Launcher Lite – FREE Live Cool Themes, Hide ...	ART_AND_DESIGN	4.7	87510	8.7	5000000	Free	Everyone	Art & Design	August 1, 2018	1.2.4	4.0.3 and up
3	Sketch - Draw & Paint	ART_AND_DESIGN	4.5	215644	25.0	50000000	Free	Teen	Art & Design	June 8, 2018	Varies with device	4.2 and up
4	Pixel Draw - Number Art Coloring Book	ART_AND_DESIGN	4.3	967	2.8	100000	Free	Everyone	Art & Design;Creativity	June 20, 2018	1.1	4.4 and up
5	Paper flowers instructions	ART_AND_DESIGN	4.4	167	5.6	50000	Free	Everyone	Art & Design	March 26, 2017	1.0	2.3 and up
6	Smoke Effect Photo Maker - Smoke Editor	ART_AND_DESIGN	3.8	178	19.0	50000	Free	Everyone	Art & Design	April 26, 2018	1.1	4.0.3 and up
7	Infinite Painter	ART_AND_DESIGN	4.1	36815	29.0	1000000	Free	Everyone	Art & Design	June 14, 2018	6.1.61.1	4.2 and up
8	Garden Coloring Book	ART_AND_DESIGN	4.4	13791	33.0	1000000	Free	Everyone	Art & Design	September 20, 2017	2.9.2	3.0 and up
9	Kids Paint Free - Drawing Fun	ART_AND_DESIGN	4.7	121	3.1	10000	Free	Everyone	Art & Design;Creativity	July 3, 2018	2.8	4.0.3 and up
10	Text on Photo - Fonteee	ART_AND_DESIGN	4.4	13880	28.0	1000000	Free	Everyone	Art & Design	October 27, 2017	1.0.4	4.1 and up
11	Name Art Photo Editor - Focus n Filters	ART_AND_DESIGN	4.4	8788	12.0	1000000	Free	Everyone	Art & Design	July 31, 2018	1.0.15	4.0 and up
12	Tattoo Name On My Photo Editor	ART_AND_DESIGN	4.2	44829	20.0	10000000	Free	Teen	Art & Design	April 2, 2018	3.8	4.1 and up
13	Mandala Coloring Book	ART_AND_DESIGN	4.6	4326	21.0	100000	Free	Everyone	Art & Design	June 26, 2018	1.0.4	4.4 and up
14	3D Color Pixel by Number - Sandbox Art Coloring	ART_AND_DESIGN	4.4	1518	37.0	100000	Free	Everyone	Art & Design	August 3, 2018	1.2.3	2.3 and up
15	Learn To Draw Kawaii Characters	ART_AND_DESIGN	3.2	55	2.7	5000	Free	Everyone	Art & Design	June 6, 2018	NaN	4.2 and up
16	Photo Designer - Write your name with shapes	ART_AND_DESIGN	4.7	3632	5.5	500000	Free	Everyone	Art & Design	July 31, 2018	3.1	4.1 and up
17	350 Diy Room Decor Ideas	ART_AND_DESIGN	4.5	27	17.0	10000	Free	Everyone	Art & Design	November 7, 2017	1.0	2.3 and up
18	FlipaClip - Cartoon animation	ART_AND_DESIGN	4.3	194216	39.0	5000000	Free	Everyone	Art & Design	August 3, 2018	2.2.5	4.0.3 and up
19	ibis Paint X	ART_AND_DESIGN	4.6	224399	31.0	10000000	Free	Everyone	Art & Design	July 30, 2018	5.5.4	4.1 and up