In this post I will give you some advice on how to extract data from a large table into a new table. In this case we are using the “INSERT INTO” statement. When working with large tables you want to have a query that runs fast and uses as little logging as possible.
Consider this query
USE [AdventureWorksDW2012]
CREATE TABLE [dbo].[FactInternetSales_new](
[ProductKey] [int] NOT NULL,
[OrderDateKey] [int] NOT NULL,
[DueDateKey] [int] NOT NULL,
[ShipDateKey] [int] NOT NULL,
[CustomerKey] [int] NOT NULL,
[PromotionKey] [int] NOT NULL,
[CurrencyKey] [int] NOT NULL,
[SalesTerritoryKey] [int] NOT NULL,
[SalesOrderNumber] [nvarchar](20) NOT NULL,
[SalesOrderLineNumber] [tinyint] NOT NULL,
[RevisionNumber] [tinyint] NOT NULL,
[OrderQuantity] [smallint] NOT NULL,
[UnitPrice] [money] NOT NULL,
[ExtendedAmount] [money] NOT NULL,
[UnitPriceDiscountPct] [float] NOT NULL,
[DiscountAmount] [float] NOT NULL,
[ProductStandardCost] [money] NOT NULL,
[TotalProductCost] [money] NOT NULL,
[SalesAmount] [money] NOT NULL,
[TaxAmt] [money] NOT NULL,
[Freight] [money] NOT NULL,
[CarrierTrackingNumber] [nvarchar](25) NULL,
[CustomerPONumber] [nvarchar](25) NULL,
[OrderDate] [datetime] NULL,
[DueDate] [datetime] NULL,
[ShipDate] [datetime] NULL
) ON [PRIMARY]
CREATE NONCLUSTERED INDEX IX_FactInternetSales_new ON dbo.FactInternetSales_new
(
CustomerKey
) WITH( STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
ON [PRIMARY]
DECLARE @t1 DATETIME;
DECLARE @t2 DATETIME;
SET @t1 = GETDATE();
insert into [dbo].[FactInternetSales_new]
select *
from [dbo].[FactInternetSales]
SET @t2 = GETDATE();
SELECT DATEDIFF(millisecond,@t1,@t2) AS elapsed_ms;
drop table [dbo].[FactInternetSales_new]
The output for this query will be

If I change the query so that the destination table is a heap (no index) and adds the “TABLOCK” query hint I will gain a minimally logged operation. And when the data is over I will create my index.
Lets take a look at the script
USE [AdventureWorksDW2012]
CREATE TABLE [dbo].[FactInternetSales_new_fast_one](
[ProductKey] [int] NOT NULL,
[OrderDateKey] [int] NOT NULL,
[DueDateKey] [int] NOT NULL,
[ShipDateKey] [int] NOT NULL,
[CustomerKey] [int] NOT NULL,
[PromotionKey] [int] NOT NULL,
[CurrencyKey] [int] NOT NULL,
[SalesTerritoryKey] [int] NOT NULL,
[SalesOrderNumber] [nvarchar](20) NOT NULL,
[SalesOrderLineNumber] [tinyint] NOT NULL,
[RevisionNumber] [tinyint] NOT NULL,
[OrderQuantity] [smallint] NOT NULL,
[UnitPrice] [money] NOT NULL,
[ExtendedAmount] [money] NOT NULL,
[UnitPriceDiscountPct] [float] NOT NULL,
[DiscountAmount] [float] NOT NULL,
[ProductStandardCost] [money] NOT NULL,
[TotalProductCost] [money] NOT NULL,
[SalesAmount] [money] NOT NULL,
[TaxAmt] [money] NOT NULL,
[Freight] [money] NOT NULL,
[CarrierTrackingNumber] [nvarchar](25) NULL,
[CustomerPONumber] [nvarchar](25) NULL,
[OrderDate] [datetime] NULL,
[DueDate] [datetime] NULL,
[ShipDate] [datetime] NULL
) ON [PRIMARY]
DECLARE @t1 DATETIME;
DECLARE @t2 DATETIME;
SET @t1 = GETDATE();
INSERT INTO [dbo].[FactInternetSales_new_fast_one] WITH (TABLOCK)
select *
from [dbo].[FactInternetSales]
CREATE NONCLUSTERED INDEX IX_FactInternetSales_new ON dbo.[FactInternetSales_new_fast_one]
(
CustomerKey
) WITH( STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
ON [PRIMARY]
SET @t2 = GETDATE();
SELECT DATEDIFF(millisecond,@t1,@t2) AS elapsed_ms;
drop table [dbo].[FactInternetSales_new_fast_one]
This query will run in about half the time on my laptop
