pyspark
dataclass 로 부터 dataframe schema 추출하기
wefree
2024. 5. 29. 17:55
AI 가 생성한 코드
from pyspark.sql.types import StructType, StructField, IntegerType, StringType
from dataclasses import fields
# Assuming you have a dataclass named User
@dataclasses.dataclass
class User:
id: int
name: str
age: int
# Get the fields of the User class
user_fields = fields(User)
# Create a StructType and StructField for each field
user_schema = StructType([
StructField(field.name, type_to_spark_type(field.type), True)
for field in user_fields
])
def type_to_spark_type(pytype):
if isinstance(pytype, int):
return IntegerType()
elif isinstance(pytype, str):
return StringType()
# Add more conditions for other types as needed
else:
raise ValueError(f"Unsupported type: {pytype}")
print(user_schema)
StructType(List(
StructField(id, IntegerType, True),
StructField(name, StringType, True),
StructField(age, IntegerType, True)
))