Pandas rolling
import pandas as pd
df = pd.read_csv('./UDEMY_TSA_FINAL/Data/starbucks.csv', index_col='Date', parse_dates=True)
df.rolling(window=7).mean()
Pyspark window function
import pyspark
from pyspark.sql import SparkSession, Window
import pyspark.sql.functions as F
spark = SparkSession.builder.appName('spark_test').master("local[*]").getOrCreate()
df = spark.read.csv('./UDEMY_TSA_FINAL/Data/starbucks.csv', header=True, inferSchema=True)
window = Window.orderBy('Date').rowsBetween(-6, Window.currentRow) # -7 이 아니라 -6 임에 주의
df.withColumn('mean', F.mean('Close').over(window)).show()
df2 = df.withColumn('Date', F.to_date(df['Date']))
df3 = df2.withColumn('Year', F.year('Date'))
df3.groupby('Year').agg(F.mean('Close'), F.mean('Volume')).show()