Apr-28-2023, 09:33 AM
Hi Team,
I want to insert CSV Records into sql table via python,
my csv file size is 10 gb,
I found this code on google, Can you suggest better way if someone has done this.
I want to insert CSV Records into sql table via python,
my csv file size is 10 gb,
I found this code on google, Can you suggest better way if someone has done this.
import pyodbc
import pandas as pd
import csv
import time
# Define the SQL Server login and password
sql_login = 'XXXX\\XXX-XXX-XXX'
sql_password = 'xxxxxxx'
# Connect to the SQL Server database
connection = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};SERVER=DESKTOP-GQK64O6;DATABASE=Customer;UID=' + sql_login + ';PWD=' + sql_password + ';')
cursor = connection.cursor()
# Define the file path and chunk size
file_path = 'abc.csv'
chunk_size = 1000
# Start the timer
start_time = time.time()
# Create a pandas dataframe iterator to read the CSV file in chunks
csv_iterator = pd.read_csv(file_path, chunksize=chunk_size, sep="|")
# Loop through the dataframe iterator
for chunk in csv_iterator:
# Get the field names for the chunk
field_names = list(chunk.columns)
# Concatenate the field names into a string
field_names_concatenated = ",".join(field_names)
# Create a list of values for the chunk
values = chunk.values.tolist()
# Create a string of percents based on the number of values in each row
percents_concatenated = ",".join(len(field_names) * ["?"])
# Create the SQL statement
sql = f"INSERT INTO employee ({field_names_concatenated}) VALUES ({percents_concatenated})"
# Execute the SQL statement for the chunk
cursor.executemany(sql, values)
connection.commit()
print(cursor.rowcount, "details inserted")
# Calculate the elapsed time
elapsed_time = time.time() - start_time
print(f"Elapsed time: {elapsed_time:.2f} seconds")
