ABOUT ME

-

Today
-
Yesterday
-
Total
-
  • 자료 구조
    pandas & duckdb 2022. 8. 8. 23:58

    https://wikidocs.net/book/4639 참고

     

    import numpy as np
    import pandas as pd
    
    arr = np.arange(100, 105)
    s = pd.Series(arr, dtype='int32')
    
    s = pd.Series(['A', 'B', 'C'])
    
    s.index  # RangeIndex(start=0, stop=3, step=1)
    s[0]  # 'A'
    # s[-1] # Exception 발생
    
    s = pd.Series(['A', 'B', 'C'], index=['a', 'b', 'c'])
    s['a']  # 'A'
    s[-1]  # index 를 지정한 이후에는 -1 사용 가능
    s.index  # Index(['a', 'b', 'c'], dtype='object')
    s.index = ['d', 'e', 'f']
    s
    # d    A
    # e    B
    # f    C
    # dtype: object
    
    
    s = pd.Series(['A', 'B', 'C'])
    s.values  # array(['A', 'B', 'C'], dtype=object)
    s.ndim  # 1
    s.shape  # (3,)
    type((3,))  # tuple
    type((3))  # int
    
    s = pd.Series(['A', np.nan, 'B', 'C'])
    s
    # 0      A
    # 1    NaN
    # 2      B
    # 3      C
    # dtype: object
    
    # fancy indexing
    s = pd.Series(['A', 'B', 'C'], index=['a', 'b', 'c'])
    s['a']  # 'A'
    s[['a', 'c']]
    # a    A
    # c    C
    # dtype: object
    
    # boolean indexing
    s = pd.Series(['A', 'B', 'C'], index=['a', 'b', 'c'])
    s[[True, False, True]]
    
    s = pd.Series([1, 2, 3, 4, 5], index=['a', 'b', 'c', 'd', 'e'])
    cond = s >= 3
    cond
    # a    False
    # b    False
    # c     True
    # d     True
    # e     True
    # dtype: bool
    s[cond]
    s[s >= 3]
    s[(s >= 3) & (s <= 4)]
    s[(s <= 1) | (s >= 4)]
    
    s = pd.Series(['A', np.nan, 'B', 'C'])
    s.isnull()
    # 0    False
    # 1     True
    # 2    False
    # 3    False
    s[s.isnull()]
    s[s.notnull()]
    
    s = pd.Series(['A', 'B', 'C'], index=['a', 'b', 'c'])
    s[0:2]
    # a    A
    # b    B
    s['a':'c']
    # a    A
    # b    B
    # c    C  <--- c 가 포함됨
    
    ##################################################################
    
    df = pd.DataFrame([[1, 2, 3],
                       [4, 5, 6],
                       [7, 8, 9]],
                      columns=['A', 'B', 'C'])
    df
    #    A  B  C
    # 0  1  2  3
    # 1  4  5  6
    # 2  7  8  9
    
    df = pd.DataFrame(
        {
            'name': ['A', 'B', 'C'],
            'age': [1, 2, 3],
            'children': [4, 5, 6]
        })
    df
    #   name  age  children
    # 0    A    1         4
    # 1    B    2         5
    # 2    C    3         6
    
    df.index  # RangeIndex(start=0, stop=3, step=1)
    df.columns  # Index(['name', 'age', 'children'], dtype='object')
    df.values
    df.dtypes
    # name        object
    # age          int64
    # children     int64
    df.T
    #           0  1  2
    # name      A  B  C
    # age       1  2  3
    # children  4  5  6
    
    
    df = pd.DataFrame(
        {
            'name': ['A', 'B', 'C'],
            'age': [1, 2, 3],
            'children': [4, 5, 6]
        })
    df.index = ['a', 'b', 'c']
    df
    #   name  age  children
    # a    A    1         4
    # b    B    2         5
    # c    C    3         6
    
    df['name']
    # a    A
    # b    B
    # c    C
    type(df['name'])  # pandas.core.series.Series
    df[['name', 'children']]
    
    df = pd.DataFrame(
        {
            'name': ['A', 'B', 'C'],
            'age': [1, 2, 3],
            'children': [4, 5, 6]
        })
    df.rename(columns={'name': '이름'})
    #   이름  age  children
    # 0  A    1         4
    # 1  B    2         5
    # 2  C    3         6
    
    df  # df 원본 데이터는 바뀌지 않았다 !!!
    #    name  age  children
    # 0    A    1         4
    # 1    B    2         5
    # 2    C    3         6
    
    # 변경 사항을 원본까지 적용할려면 inplace=True 를 준다.
    df.rename(columns={'name': '이름'}, inplace=True)

     

    참고: https://www.udemy.com/course/pandas-i/

    'pandas & duckdb' 카테고리의 다른 글

    데이터 전처리, 추가, 삭제, 변환  (0) 2022.10.08
    복사와 결측치  (0) 2022.10.08
    통계  (0) 2022.10.08
    조회, 정렬, 필터  (0) 2022.08.20
    Excel, CSV 파일 읽기/쓰기  (0) 2022.08.19

    댓글

Designed by Tistory.