pyspark

dataclass 로 부터 dataframe schema 추출하기

wefree 2024. 5. 29. 17:55

AI 가 생성한 코드

from pyspark.sql.types import StructType, StructField, IntegerType, StringType
from dataclasses import fields

# Assuming you have a dataclass named User
@dataclasses.dataclass
class User:
    id: int
    name: str
    age: int

# Get the fields of the User class
user_fields = fields(User)

# Create a StructType and StructField for each field
user_schema = StructType([
    StructField(field.name, type_to_spark_type(field.type), True)
    for field in user_fields
])

def type_to_spark_type(pytype):
    if isinstance(pytype, int):
        return IntegerType()
    elif isinstance(pytype, str):
        return StringType()
    # Add more conditions for other types as needed
    else:
        raise ValueError(f"Unsupported type: {pytype}")

print(user_schema)
StructType(List(
    StructField(id, IntegerType, True),
    StructField(name, StringType, True),
    StructField(age, IntegerType, True)
))