web-dev-qa-db-ja.com

巨大なテーブルへのSQL Serverの挿入が遅い

SQl Server 2008データベースに大きなテーブルがあり、約5億7000万のレコードがあります。毎日20万件程度のトランザクションレコードのファイルを受け取り、このデータをグループ化して合計し、大きなテーブルに挿入するバッチジョブを実行します。

最近、大きなテーブルのクラスター化インデックスをidentity int列に変更する実験を行いました。これにより、挿入が3時間から1時間に短縮されました。テーブルのサイズ)

これは5億7000万行のテーブルです

CREATE TABLE [dbo].[POINTS_EARNED](
[POINTS_EARNED_ID]int identity not null,
[CARD_ID] [int] NOT NULL,
[CYCLE_ID] [int] NOT NULL,
[POINTS_CODE] [int] NOT NULL,
[NO_POINTS] [int] NULL,
[ACCOUNT_ID] [int] NOT NULL,
[CREATED_DATE] [datetime] NULL,
[CREATED_BY] [varchar](20) NULL,
[LAST_MODIFIED_DATE] [datetime] NULL,
[LAST_MODIFIED_BY] [varchar](20) NULL,
[DELETED] [bit] NULL,
CONSTRAINT [PK_POINTS_EARNED] PRIMARY KEY CLUSTERED 
(
[POINTS_EARNED_ID] ASC
)WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, 
IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]

) ON [PRIMARY]

また、いくつかの制約(デフォルトおよび外部キー)とインデックスがあり、これらが問題の原因であるかどうか疑問に思っています。

実行に1時間かかる実際のSQLは次のとおりです。

insert into points_earned (
    card_id,
    cycle_id,
    points_code,
    no_points,
    account_id
    ) 

select pe.card_id, pe.cycle_id, pe.points_code, sum(pe.no_points),pe.account_id
from #points_earned pe 
group by pe.card_id, pe.cycle_id, pe.points_code,pe.account_id

一時テーブル#points_earnedには約200,000行があり、次の構造(インデックスなし)があります

create table #points_earned (
        card_id           int,
        cycle_id    int,
        points_code int,
        card_type   varchar(5),
        no_points   int,
        account_id int
        )

だから、私は私がすべきかどうかについていくつかの意見が欲しい

  • 一時テーブルにインデックスを追加します
  • データを追加する前に大きなテーブルに非クラスター化インデックスを削除してから、それらを再作成します
  • 他のオプションはありますか?

更新-もう少し情報が要求されたように-selectステートメントは挿入なしで2秒で実行されるため、これは問題ではないようです。表

インデックス、(更新)トリガー、および制約は次のとおりです。

    CREATE NONCLUSTERED INDEX [IDX_CYCLE_ID] ON [dbo].[POINTS_EARNED] 
(
    [CYCLE_ID] ASC
)WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]
GO
CREATE NONCLUSTERED INDEX [IX_ACCOUNT_ID] ON [dbo].[POINTS_EARNED] 
(
    [ACCOUNT_ID] ASC
)WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]
GO
CREATE NONCLUSTERED INDEX [IX_ACCOUNT_ID_POINTS_CODE] ON [dbo].[POINTS_EARNED] 
(
    [ACCOUNT_ID] ASC,
    [POINTS_CODE] ASC
)
INCLUDE ( [CARD_ID],
[CYCLE_ID],
[NO_POINTS]) WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]
GO
CREATE NONCLUSTERED INDEX [RELATION_151_FK] ON [dbo].[POINTS_EARNED] 
(
    [CARD_ID] ASC,
    [CYCLE_ID] ASC
)WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]
GO
CREATE NONCLUSTERED INDEX [RELATION_152_FK] ON [dbo].[POINTS_EARNED] 
(
    [POINTS_CODE] ASC
)WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]
GO
/****** Object:  Trigger [update_points_earned]    Script Date: 09/13/2013 13:20:54 ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE TRIGGER [dbo].[update_points_earned] ON [dbo].[POINTS_EARNED]
FOR UPDATE
AS
 BEGIN
  UPDATE points_earned 
  SET Last_Modified_By = USER,
   Last_Modified_Date = GETDATE()  
  FROM
   points_earned t,
   inserted i
  WHERE
   t.card_id = i.card_id AND
   t.cycle_id = i.cycle_id AND
   t.points_code = i.points_code AND
   t.account_id = i.account_id
 END
GO
/****** Object:  Default [DF_POINTS_EARNED_ACCOUNT_ID]    Script Date: 09/13/2013 13:20:54 ******/
ALTER TABLE [dbo].[POINTS_EARNED] ADD  CONSTRAINT [DF_POINTS_EARNED_ACCOUNT_ID]  DEFAULT ((0)) FOR [ACCOUNT_ID]
GO
/****** Object:  Default [DF_POINTS_EARNED_CREATED_DATE]    Script Date: 09/13/2013 13:20:54 ******/
ALTER TABLE [dbo].[POINTS_EARNED] ADD  CONSTRAINT [DF_POINTS_EARNED_CREATED_DATE]  DEFAULT (getdate()) FOR [CREATED_DATE]
GO
/****** Object:  Default [DF_POINTS_EARNED_CREATED_BY]    Script Date: 09/13/2013 13:20:54 ******/
ALTER TABLE [dbo].[POINTS_EARNED] ADD  CONSTRAINT [DF_POINTS_EARNED_CREATED_BY]  DEFAULT (user_name()) FOR [CREATED_BY]
GO
/****** Object:  Default [DF_POINTS_EARNED_DELETED]    Script Date: 09/13/2013 13:20:54 ******/
ALTER TABLE [dbo].[POINTS_EARNED] ADD  CONSTRAINT [DF_POINTS_EARNED_DELETED]  DEFAULT ((0)) FOR [DELETED]
GO
/****** Object:  ForeignKey [FK_POINTS_E_REFERENCE_CYCLE_CA]    Script Date: 09/13/2013 13:20:54 ******/
ALTER TABLE [dbo].[POINTS_EARNED]  WITH CHECK ADD  CONSTRAINT [FK_POINTS_E_REFERENCE_CYCLE_CA] FOREIGN KEY([CARD_ID], [CYCLE_ID])
REFERENCES [dbo].[CYCLE_CARD] ([CARD_ID], [CYCLE_ID])
GO
ALTER TABLE [dbo].[POINTS_EARNED] CHECK CONSTRAINT [FK_POINTS_E_REFERENCE_CYCLE_CA]
GO
/****** Object:  ForeignKey [FK_POINTS_E_REFERENCE_POINTS_C]    Script Date: 09/13/2013 13:20:54 ******/
ALTER TABLE [dbo].[POINTS_EARNED]  WITH NOCHECK ADD  CONSTRAINT [FK_POINTS_E_REFERENCE_POINTS_C] FOREIGN KEY([POINTS_CODE])
REFERENCES [dbo].[POINTS_CODE] ([POINTS_CODE])
GO
ALTER TABLE [dbo].[POINTS_EARNED] CHECK CONSTRAINT [FK_POINTS_E_REFERENCE_POINTS_C]
GO
/****** Object:  ForeignKey [FK_POINTS_EARNED_REF_ACCOUNT]    Script Date: 09/13/2013 13:20:54 ******/
ALTER TABLE [dbo].[POINTS_EARNED]  WITH NOCHECK ADD  CONSTRAINT [FK_POINTS_EARNED_REF_ACCOUNT] FOREIGN KEY([ACCOUNT_ID])
REFERENCES [dbo].[ACCOUNT] ([ACCOUNT_ID])
GO
ALTER TABLE [dbo].[POINTS_EARNED] CHECK CONSTRAINT [FK_POINTS_EARNED_REF_ACCOUNT]

編集2、挿入ステートメントのクエリプラン

  |--Sequence
   |--Index Insert(OBJECT:([Progressive_Points].[dbo].[POINTS_EARNED].[IDX_CYCLE_ID]), SET:([POINTS_EARNED_ID1040] = [Progressive_Points].[dbo].[POINTS_EARNED].[POINTS_EARNED_ID],[CYCLE_ID1041] = [Progressive_Points].[dbo].[POINTS_EARNED].[CYCLE_ID]) WITH ORDERED PREFETCH)
   |    |--Sort(ORDER BY:([Progressive_Points].[dbo].[POINTS_EARNED].[CYCLE_ID] ASC, [Progressive_Points].[dbo].[POINTS_EARNED].[POINTS_EARNED_ID] ASC))
   |         |--Table Spool
   |              |--Clustered Index Insert(OBJECT:([Progressive_Points].[dbo].[POINTS_EARNED].[PK_POINTS_EARNED]), SET:([Progressive_Points].[dbo].[POINTS_EARNED].[CARD_ID] = RaiseIfNullInsert([tempdb].[dbo].[#points_earned].[card_id] as [pe].[card_id]),[Progressive_Points].[dbo].[POINTS_EARNED].[CYCLE_ID] = RaiseIfNullInsert([tempdb].[dbo].[#points_earned].[cycle_id] as [pe].[cycle_id]),[Progressive_Points].[dbo].[POINTS_EARNED].[POINTS_CODE] = RaiseIfNullInsert([tempdb].[dbo].[#points_earned].[points_code] as [pe].[points_code]),[Progressive_Points].[dbo].[POINTS_EARNED].[NO_POINTS] = [Expr1006],[Progressive_Points].[dbo].[POINTS_EARNED].[ACCOUNT_ID] = RaiseIfNullInsert([tempdb].[dbo].[#points_earned].[account_id] as [pe].[account_id]),[Progressive_Points].[dbo].[POINTS_EARNED].[POINTS_EARNED_ID] = [Expr1007],[Progressive_Points].[dbo].[POINTS_EARNED].[CREATED_DATE] = [Expr1008],[Progressive_Points].[dbo].[POINTS_EARNED].[CREATED_BY] = [Expr1009],[Progressive_Points].[dbo].[POINTS_EARNED].[DELETED] = [Expr1010],[Progressive_Points].[dbo].[POINTS_EARNED].[LAST_MODIFIED_DATE] = NULL,[Progressive_Points].[dbo].[POINTS_EARNED].[LAST_MODIFIED_BY] = NULL) WITH UNORDERED PREFETCH)
   |                   |--Compute Scalar(DEFINE:([Expr1008]=getdate(), [Expr1009]=CONVERT_IMPLICIT(varchar(20),user_name(),0), [Expr1010]=(0)))
   |                        |--Compute Scalar(DEFINE:([Expr1007]=getidentity((1243867498),(8),NULL)))
   |                             |--Top(ROWCOUNT est 0)
   |                                  |--Parallelism(Gather Streams)
   |                                       |--Compute Scalar(DEFINE:([Expr1006]=CASE WHEN [Expr1062]=(0) THEN NULL ELSE [Expr1063] END))
   |                                            |--Hash Match(Aggregate, HASH:([pe].[card_id], [pe].[cycle_id], [pe].[points_code], [pe].[account_id]), RESIDUAL:([tempdb].[dbo].[#points_earned].[card_id] as [pe].[card_id] = [tempdb].[dbo].[#points_earned].[card_id] as [pe].[card_id] AND [tempdb].[dbo].[#points_earned].[cycle_id] as [pe].[cycle_id] = [tempdb].[dbo].[#points_earned].[cycle_id] as [pe].[cycle_id] AND [tempdb].[dbo].[#points_earned].[points_code] as [pe].[points_code] = [tempdb].[dbo].[#points_earned].[points_code] as [pe].[points_code] AND [tempdb].[dbo].[#points_earned].[account_id] as [pe].[account_id] = [tempdb].[dbo].[#points_earned].[account_id] as [pe].[account_id]) DEFINE:([Expr1062]=COUNT_BIG([tempdb].[dbo].[#points_earned].[no_points] as [pe].[no_points]), [Expr1063]=SUM([tempdb].[dbo].[#points_earned].[no_points] as [pe].[no_points])))
   |                                                 |--Parallelism(Repartition Streams, Hash Partitioning, PARTITION COLUMNS:([pe].[card_id], [pe].[cycle_id], [pe].[points_code], [pe].[account_id]))
   |                                                      |--Clustered Index Scan(OBJECT:([tempdb].[dbo].[#points_earned] AS [pe]))
   |--Index Insert(OBJECT:([Progressive_Points].[dbo].[POINTS_EARNED].[IX_ACCOUNT_ID]), SET:([POINTS_EARNED_ID1042] = [Progressive_Points].[dbo].[POINTS_EARNED].[POINTS_EARNED_ID],[ACCOUNT_ID1043] = [Progressive_Points].[dbo].[POINTS_EARNED].[ACCOUNT_ID]) WITH ORDERED PREFETCH)
   |    |--Sort(ORDER BY:([Progressive_Points].[dbo].[POINTS_EARNED].[ACCOUNT_ID] ASC, [Progressive_Points].[dbo].[POINTS_EARNED].[POINTS_EARNED_ID] ASC))
   |         |--Table Spool
   |--Assert(WHERE:(CASE WHEN [Expr1050] IS NULL THEN (0) ELSE NULL END))
   |    |--Nested Loops(Left Semi Join, OUTER REFERENCES:([Progressive_Points].[dbo].[POINTS_EARNED].[ACCOUNT_ID], [Expr1068]) WITH UNORDERED PREFETCH, DEFINE:([Expr1050] = [PROBE VALUE]))
   |         |--Index Insert(OBJECT:([Progressive_Points].[dbo].[POINTS_EARNED].[IX_ACCOUNT_ID_POINTS_CODE]), SET:([POINTS_EARNED_ID1044] = [Progressive_Points].[dbo].[POINTS_EARNED].[POINTS_EARNED_ID],[CARD_ID1045] = [Progressive_Points].[dbo].[POINTS_EARNED].[CARD_ID],[CYCLE_ID1046] = [Progressive_Points].[dbo].[POINTS_EARNED].[CYCLE_ID],[POINTS_CODE1047] = [Progressive_Points].[dbo].[POINTS_EARNED].[POINTS_CODE],[NO_POINTS1048] = [Progressive_Points].[dbo].[POINTS_EARNED].[NO_POINTS],[ACCOUNT_ID1049] = [Progressive_Points].[dbo].[POINTS_EARNED].[ACCOUNT_ID]) WITH UNORDERED PREFETCH)
   |         |    |--Table Spool
   |         |--Clustered Index Seek(OBJECT:([Progressive_Points].[dbo].[ACCOUNT].[PK_ACCOUNT]), SEEK:([Progressive_Points].[dbo].[ACCOUNT].[ACCOUNT_ID]=[Progressive_Points].[dbo].[POINTS_EARNED].[ACCOUNT_ID]) ORDERED FORWARD)
   |--Assert(WHERE:(CASE WHEN [Expr1054] IS NULL THEN (0) ELSE NULL END))
   |    |--Nested Loops(Left Semi Join, OUTER REFERENCES:([Progressive_Points].[dbo].[POINTS_EARNED].[CARD_ID], [Progressive_Points].[dbo].[POINTS_EARNED].[CYCLE_ID], [Expr1070]) WITH UNORDERED PREFETCH, DEFINE:([Expr1054] = [PROBE VALUE]))
   |         |--Index Insert(OBJECT:([Progressive_Points].[dbo].[POINTS_EARNED].[RELATION_151_FK]), SET:([POINTS_EARNED_ID1051] = [Progressive_Points].[dbo].[POINTS_EARNED].[POINTS_EARNED_ID],[CARD_ID1052] = [Progressive_Points].[dbo].[POINTS_EARNED].[CARD_ID],[CYCLE_ID1053] = [Progressive_Points].[dbo].[POINTS_EARNED].[CYCLE_ID]) WITH ORDERED PREFETCH)
   |         |    |--Sort(ORDER BY:([Progressive_Points].[dbo].[POINTS_EARNED].[CARD_ID] ASC, [Progressive_Points].[dbo].[POINTS_EARNED].[CYCLE_ID] ASC, [Progressive_Points].[dbo].[POINTS_EARNED].[POINTS_EARNED_ID] ASC))
   |         |         |--Table Spool
   |         |--Row Count Spool
   |              |--Index Seek(OBJECT:([Progressive_Points].[dbo].[CYCLE_CARD].[IDX_NCLST_CARD_ID_CYCLE_ID]), SEEK:([Progressive_Points].[dbo].[CYCLE_CARD].[CARD_ID]=[Progressive_Points].[dbo].[POINTS_EARNED].[CARD_ID] AND [Progressive_Points].[dbo].[CYCLE_CARD].[CYCLE_ID]=[Progressive_Points].[dbo].[POINTS_EARNED].[CYCLE_ID]) ORDERED FORWARD)
   |--Assert(WHERE:(CASE WHEN [Expr1057] IS NULL THEN (0) ELSE NULL END))
        |--Merge Join(Left Semi Join, MANY-TO-MANY MERGE:([Progressive_Points].[dbo].[POINTS_EARNED].[POINTS_CODE])=([Progressive_Points].[dbo].[POINTS_CODE].[POINTS_CODE]), RESIDUAL:([Progressive_Points].[dbo].[POINTS_EARNED].[POINTS_CODE]=[Progressive_Points].[dbo].[POINTS_CODE].[POINTS_CODE]))
             |--Index Insert(OBJECT:([Progressive_Points].[dbo].[POINTS_EARNED].[RELATION_152_FK]), SET:([POINTS_EARNED_ID1055] = [Progressive_Points].[dbo].[POINTS_EARNED].[POINTS_EARNED_ID],[POINTS_CODE1056] = [Progressive_Points].[dbo].[POINTS_EARNED].[POINTS_CODE]) WITH ORDERED PREFETCH)
             |    |--Sort(ORDER BY:([Progressive_Points].[dbo].[POINTS_EARNED].[POINTS_CODE] ASC, [Progressive_Points].[dbo].[POINTS_EARNED].[POINTS_EARNED_ID] ASC))
             |         |--Table Spool
             |--Index Scan(OBJECT:([Progressive_Points].[dbo].[POINTS_CODE].[POINTS_CODES_PK]), ORDERED FORWARD)
24
jazza1000

OK、私がすることは次のとおりです。

  1. 重複する可能性があるため、[IX_ACCOUNT_ID_POINTS_CODE]と[IX_ACCOUNT_ID]の両方のインデックスが必要かどうかを確認してください。

  2. INSERTを実行する前に、トリガーを無効にして外部キーをドロップします。

  3. トリガーによって通常設定されるフィールドをINSERT設定し、FK列の値が有効であることを確認します。

  4. トリガーを再度有効にし、NOCHECKを使用して外部キーを再作成します。

合計行数の0.2%未満を挿入するため、インデックスをオンのままにしておきます。そのため、インデックスを削除して再構築するよりも、インプレースで更新する方がおそらく高速です。

15
RBarryYoung

大規模なテーブルから20万行を一度に削除する代わりに、チャンクしてみてください。例えば。:

while (1=1)
begin
  delete top(1000) from #points_earned
  output deleted.* into points_earned
  if @@rowcount=0 break
end
8
gordy

TABLEに関して、追加する各レコードのパフォーマンスに影響する3つの考慮事項があります。(1)インデックス(2)トリガー(3)外部キー

余裕がある場合は、適切なSAS Drives。

それ以外の場合、毎分数十万件のレコードが更新される同様の状況で、同じインスタンス内の別のデータベース(*)で別のテーブル(例:[POINTS_EARNED_TMP])を使用して、BULK/INSERTの手法を使用します。

トリガー[POINTS_EARNED_TMP]でレコードを追加します。次に、別のBULKから、トリガーなしでプロシージャを設定し、データを[POINTS_EARNED_TMP]から[POINTS_EARNED](USERおよびDATE Updateを含む)にBULK/INSERTします。少なくとも、トリガーのパフォーマンスは回避され、同じインスタンス内の#TMPも回避されます。

(*)別のデータベースの使用は、主にメンテナンスのためです。 BULKは、これまでのところ、INSERT TOと比較して驚くべき結果をもたらします。

1
Little.Buddy