# Step-by-Step Script for File to Table Data Loading in Databricks:
from pyspark.sql import SparkSession
# Initialize Spark session
spark = SparkSession.builder \
.appName("File to Table Data Loading") \
.getOrCreate()
# Load data from CSV files into DataFrame
df = spark.read.format("csv") \
.option("header", "true") \
.load("dbfs:/mnt/data/csv_files/")
# Perform data transformations if needed
df = df.withColumn("amount", df["amount"].cast("double"))
# Save DataFrame to a Delta Lake table
df.write.format("delta") \
.mode("overwrite") \ # or "append" for incremental loading
.saveAsTable("my_database.my_table")
# Optionally, stop Spark session
spark.stop()
No comments:
Post a Comment