Example to create dataframe
from pyspark import SparkConf, SparkContext
from pyspark.sql import SparkSession
spark = SparkSession.builder.getOrCreate()
sc = spark.sparkContext
def create_dataframe():
"""
Example to create dataframe
"""
headers = ("id" , "name")
data = [
(1, "puneetha")
,(2, "bhoomika")
]
df = spark.createDataFrame(data, headers)
df.show(1, False)
# Output:
# |id |name |
# +---+--------+
# |1 |puneetha|
# +---+--------+
# only showing top 1 row
# display all records, without truncating
df.show(df.count(), False)
# Output:
# |id |name |
# +---+--------+
# |1 |puneetha|
# |2 |bhoomika|
# +---+--------+
def main():
create_dataframe()
if __name__ == "__main__":
main()