DA with py
#隨便寫寫,Python部分有省略~( ̄▽ ̄~) #jypyter shortcut insert cell above : A insert cell below : B blue:command green:manage H: keyboard enter:edit code ctrl + enter : run markdown:insert any type (command model) # can change the size of the title if press Y. This is a code cell if press M. This is a markdown cell (command model) press F (find and replace) press number one to five can change the size of markdown press X can cut the cell press V can stick the cell press D *2 pres Z can withdraw ctrl + S save -----------分割線~( ̄▽ ̄~)~---------- #data types int and float bool:True and False "hello world".count('l') -3 "hello world".replace('o','u') -'hellu wurld' -----------分割線~( ̄▽ ̄~)~---------- #variables message_1 = "Hello world" message_2 = "ABA" f'{message_1} {message_2}' -"Hello world ABA" *{}only. Type anything you can -----------分割線~( ̄▽ ̄~)~---------- #list slicing country['US', 'China', 'India', 'UK'] list_name[start:stop] country[0:2] -['US', 'China'] country[:2] -['US', 'China'] # add new elements .append('xx') or .insert(1, 'xx') nested_list = [list1 + list2] -[['X'],['Y']] #delet elements .remove('xx') or .pop(1) or del countries[1] #sort a list .sort()(從小到大) .sort(reverse = True)(從大到小) #update an element numbers = [1, 2, 3, 9] numbers[0] = 100 numbers -[100, 2, 3, 9] -----------分割線~( ̄▽ ̄~)~---------- #dictionary dict = {'aa': 'ss', 'bb':'ff'} data … data.items() data['hh'] = 1 data.update({'hh'=1}) #delet dict data.pop('hh') or del data['hh'] data.clear() -{} -----------分割線~( ̄▽ ̄~)~---------- #if statement age = 10 if age >= 18: print("adult") Elie age >= 14: print("teenage") else: print("kid") -----------分割線~( ̄▽ ̄~)~---------- #for loop for i,country in enumerate(countries): print(i) print(country) -0 -US -1 -India -2 -Chian -3 -US -----------分割線~( ̄▽ ̄~)~---------- #function def values(a,b): x = a + b return x values(1,2) -3 -----------分割線~( ̄▽ ̄~)~---------- #module important os os.getcwd() os.listdir() os.makedirs() -----------分割線~( ̄▽ ̄~)~---------- pandas better than Excel data frame: 橫為row,豎為index(索引),索引右邊為series How to create a dataframe 1.arrays 2.dictionary 3.CSV file import pandas as pd import numpy as np #arrays way1 in list create an array data = np.array([[1,4], [2,5], [3,6]]) create a dataframe df = pd.DataFrame(data, index = ['row1', 'row2', 'row3'], columns = ['col1', 'col2']) way2 data = [[1,4], [2,5], [3,6]] df = pd.DataFrame(data, index = ['row1', 'row2', 'row3'] way1 in dictionary A = ['aa', 'bb', 'cc'] B = ['dd', 'ee', 'ff'] dict = {'a':A, 'b':B} df = pd.DataFrame(dict) way1 from a CSV file df = pd.read_csv('xxx.csv') df.head() or data = { 'a':['aa', 'bb', 'cc'], 'b':['dd', 'ee', 'ff'] } #see all the rows: df.() reading csv file show first5 rows in a dataframe: df.head() ……last 5 rows……: df.tail() show first/last n rows in a dataframe: df.head/tail(n) show all the row: df.shape -(xx,xx) display n rows: pd.set_option('display.max_rows',n) (enter)(get all the rows) -----------分割線~( ̄▽ ̄~)~---------- attributes df.shape -(number of rows,number of columns) df.index -Rangeindex(start=0, stop=n,step=1) df.columns -Index(………) df.dtypes -----------分割線~( ̄▽ ̄~)~---------- methods df.info() df.describe() -----------分割線~( ̄▽ ̄~)~---------- functions the number of rows: len(df) max(df.index) type(df) round(df,n) (n是保留幾位小數(shù)) -----------分割線~( ̄▽ ̄~)~---------- #select one column syntax1 df['column1'] type(df['column1']) df['column1'].index df['column1'].head() syntax2 df.column1 or df['column'](可不帶_) -----------分割線~( ̄▽ ̄~)~---------- #select two or more columns df[['column1', 'column2']] *We can't select 2 or more columns with the "." -----------分割線~( ̄▽ ̄~)~---------- #add a new column df['new column' ] = n(add in the end. all the values will be same.) #add different values import numpy as np new value = np.arange(0,n) (create an array) df['new_value'] = new value (start at 0 to n) #create random integer numbers np.random.randit(min value,max value(don't include),size=n) #create random float numbers np.randI'm. uniform(min value,max value,size=n) -----------分割線~( ̄▽ ̄~)~---------- #math operations in column df['column1'].sum() df['column1'].count()(有幾行) df['column1'].mean()(平均值) df['column1'].std()(標準差) df['column1'].min() df['column1'].max() df.describe()(上面的全有) in rows sum: df['column1'] + df['column2'] + df['column3'] average: df['average'] = (df['column1'] + df['column2'] + df['column3']) 小數(shù)點 df.round(n) -----------分割線~( ̄▽ ̄~)~---------- #value counts len(df['column']) or df['column'].count() divide category df['column'].value_counts() 百分比 df['column'].value_counts(normalize=True) -----------分割線~( ̄▽ ̄~)~---------- #sort a dataframe df.sort_values(by='xx') or df.sort_values('xx') df.sort_values('xx',ascending=True(升序)/False(降序)) df.sort_values(['xx','xxx'] ,ascending=False) #update dataframe df.sort_values('xx',ascending=False, inplace=True) -----------分割線~( ̄▽ ̄~)~---------- import pandas as pd #pivot() df = pd.read_csv('xxx.csv') df.pivot(index="xx'', columns=''xxx'',value=''xxxx'') (shift+tab can get three arguments:行,列,數(shù)據(jù)) (最上頭一行是columns的值,下面一豎行是index的值,數(shù)值是value的數(shù)) #pivot_table() df = pd.read_excel("xx") df.pivot_table(index="xx",affunc="xxx") get only column: df.pivot_table(index="xx", values=["column1","column2"], affunc="xxx") df.pivot_table(index="xx",
columns="column1的值"
values="column1",
affunc="xxx")
-----------分割線~( ̄▽ ̄~)~----------
import pandas as pd
df = pd.read_csv('xx')
#making a pivot table
drop null values
df.dropna(inplace=True) or
df = df.dropna()
make a pivot table:
df_pivot = df.pivot(index='x',columns='xx',
values='xxx')
select the value in columns:
df_pivot = df_pivot[['xx', 'xxx', 'xxxx']]
df_pivot (run!)
-----------分割線~( ̄▽ ̄~)~----------
#lineplot
df_pivot.plot(kind='line',xlabel='xx',
ylabel=x',title='xx'
figsize=(1,2))
^是個元組
(橫坐標 縱坐標 標題 坐標軸的尺寸)
#barplot
##一個變量的條形圖
select one index
df = df_pivot[df_pivot.index.isin([xxx])]
df_pivot = df.T
(然后表格變成T形賬了!(bushi)
df_pivot.plot(kind='bar',color='blue'
xlabel='xx',ylabel='xx'
title='xxx')
##n個變量的條形圖
df = df_pivot[df_pivot.index.isin([xx,xxxx,xxxxx,xxx])]
df.plot(kind='bar')
(跟上頭一樣可以改橫縱坐標之類的)
-----------分割線~( ̄▽ ̄~)~----------
#piechart
df.rename(columns={xx:'xx'}, inplace=True)
(把數(shù)字變成字符串)
df.plot(kind='pie', y='xx')
-----------分割線~( ̄▽ ̄~)~----------
#lineplot
import m at plot line.py plot as plt
save plot:
plt.savefig('my_text.png')
show plot:
plt.show()
df_pivot.plot(kind='line',xlabel='xx',
ylabel=x',title='xx'
figsize=(1,2))
導出數(shù)據(jù)透視表:
df_pivot.to_excel('pivot_table.xlsx)