pandas & duckdb

DuckDB 에서 pyspark api 사용하기

wefree 2025. 10. 31. 22:18

https://duckdb.org/docs/stable/clients/python/spark_api

from duckdb.experimental.spark.sql import SparkSession as session
from duckdb.experimental.spark.sql.functions import lit, col
import pandas as pd

spark = session.builder.getOrCreate()

pandas_df = pd.DataFrame({
    'age': [10, 20, 30]
})

df = spark.createDataFrame(pandas_df)

res = df.select(col('age'))
res.toPandas().head()