SQLクエリのWHERE句の代替

Question

ストアドプロシージャを介して次のクエリを実行するSQL Server 2012 Enterprise Editionがあります。

declare @TopX int = 1000 declare @stores Table (Store varchar(5), LastDate datetime, LastId int, RangeEnd datetime) insert into @stores select * from (select SourceStore, '2014-01-01' as i, null as ii, '2014-01-08' as iii from StoreConfig.dbo.Version group by SourceStore ) t where (ABS(CAST((BINARY_CHECKSUM(*) * Rand()) as int)) % 100) < 50 IF OBJECT_ID('tempdb..#agreements') IS NOT NULL DROP TABLE #agreements IF OBJECT_ID('tempdb..#stores') IS NOT NULL DROP TABLE #stores select Store, isnull(LastDate, '1899-01-01') StartDate, isnull(LastId, -1) LastId, isnull(RangeEnd, getdate()) RangeEnd into #stores from @stores update #stores set StartDate = '2015-07-01', RangeEnd='2015-07-08' -- THIS IS NOT FAST.. :( SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED; --grab the @TopX agreements from primary customers --note: only grabbing the columns in our index to prevent RID lookups --on every agreement before we sort and take a relatively tiny subset select top 1000 a.SourceStore ,a.AgreementId ,isnull(a.ModifiedDate, a.CreatedDate) as ModifiedDate ,ca.CustomerId from #stores s inner join StoreOps.POSREPL3Agreement.Agreement a on a.SourceStore = s.Store inner join StoreOps.Customer.CustomerAgreement ca on ca.SourceStore = a.SourceStore and ca.AgreementId = a.AgreementId and ca.IsPrimary = 1 where ( (a.ModifiedDate between s.StartDate and s.RangeEnd) or ( a.ModifiedDate is null and a.CreatedDate between s.StartDate and s.RangeEnd) ) and ( isnull(a.ModifiedDate, a.CreatedDate) > s.StartDate or a.AgreementId > s.LastId ) order by isnull(a.ModifiedDate, a.CreatedDate), a.AgreementId

[Customer].[CustomerAgreement]テーブルには次のインデックスがあります。

CREATE NONCLUSTERED INDEX [IX_CustomerAgreement_SourceStore_AgreementId] ON [Customer].[CustomerAgreement] ([SourceStore] ASC, [AgreementId] ASC, [IsPrimary] ASC) INCLUDE ([CustomerId]) ON [PRIMARY] GO

そして、これは[POSREPL3Agreement].[Agreement]テーブルのインデックスです：

CREATE NONCLUSTERED INDEX [IX_Agreement_SourceStore_ModifiedDate] ON [POSREPL3Agreement].[Agreement] ([SourceStore] ASC, [ModifiedDate] ASC, [CreatedDate] ASC) INCLUDE ([AgreementId]) ON [PRIMARY] GO

WHERE句を削除するとインデックスは期待どおりに機能し、両方のテーブルから1000レコードが表示されますが、リストされているWHERE句を追加すると、[Customer].[CustomerAgreement]は1000ではなくすべてのレコードを推定します。

どのようにWHERE句またはインデックスを改善して、[Agreement]テーブルを[CustomerAgreement]テーブルと整列させ、[CustomerAgreement]の下の推定行がすべてのレコードではないようにすることができますか？

テーブル定義

CREATE TABLE [Customer].[CustomerAgreement]( [CustomerAgreementId] [int] NOT NULL, [CustomerId] [int] NOT NULL, [AgreementId] [int] NOT NULL, [IsPrimary] [bit] NOT NULL, [Store] [varchar](5) NOT NULL, [SourceStore] [varchar](5) NOT NULL, [RowGUID] [uniqueidentifier] ROWGUIDCOL NOT NULL, [Repl_ID] [tinyint] NOT NULL, CONSTRAINT [PK_Customer_CustomerAgreementID_SourceStore] PRIMARY KEY NONCLUSTERED ( [CustomerAgreementId] ASC, [SourceStore] ASC, [Repl_ID] ASC, [RowGUID] ASC )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY] ) ON [PRIMARY] CREATE TABLE [POSREPL3Agreement].[Agreement]( [AgreementId] [int] NOT NULL, [QuoteId] [int] NULL, [AgreementNumber] [varchar](11) NOT NULL, [AgreementStatusId] [tinyint] NOT NULL, [AgreementPrinted] [bit] NOT NULL, [IsNewPOSCreated] [bit] NOT NULL, [LeaseFrequencyId] [tinyint] NOT NULL, [DeferredLateFeeAmount] [decimal](8, 2) NOT NULL, [InHomeVisitFeeAmount] [decimal](8, 2) NOT NULL, [IsASPTaxable] [bit] NOT NULL, [ServicePlusRate] [decimal](7, 5) NOT NULL, [ServicePlusFloor] [decimal](5, 2) NOT NULL, [TaxRatePercentage] [decimal](7, 5) NOT NULL, [IgnoreTaxRateChange] [bit] NOT NULL, [Balance] [decimal](8, 2) NOT NULL, [AmountPaidToDate] [decimal](10, 2) NOT NULL, [Deposit] [decimal](8, 2) NOT NULL, [DeliveryFee] [decimal](8, 2) NOT NULL, [StartDate] [datetime] NOT NULL, [DueDay] [int] NOT NULL, [DueDayTypeId] [tinyint] NULL, [PaidThroughDate] [datetime] NOT NULL, [PayOutDate] [datetime] NOT NULL, [FinalDate] [datetime] NULL, [IsNSFOutstanding] [bit] NOT NULL, [LeadSourceId] [tinyint] NOT NULL, [AgreementTypeId] [tinyint] NOT NULL, [AcquisitionAgreementTypeId] [tinyint] NULL, [SameAsCashDuration] [int] NOT NULL, [SameAsCashDurationType] [int] NOT NULL, [MinimumPercentageOfCashPriceForFinalPayment] [decimal](3, 2) NOT NULL, [EarlyPayoutLeaseAmountRate] [decimal](3, 2) NOT NULL, [EarlyPayout] [decimal](10, 2) NULL, [FinalPaymentAdditionalFee] [decimal](8, 2) NOT NULL, [FinalPaymentProrateAmount] [decimal](10, 2) NOT NULL, [CreditedAssociateId] [int] NULL, [NonRenewalGracePeriod] [int] NOT NULL, [LastAgreementTransactionId] [int] NULL, [CanPayout] [bit] NOT NULL, [IsServicePlusIncludedInPayout] [bit] NOT NULL, [IsProrateEnabled] [bit] NOT NULL, [AgreementDocument] [varbinary](max) NULL, [CreatedDate] [datetime] NOT NULL, [CreatedBy] [int] NULL, [ModifiedDate] [datetime] NULL, [ModifiedBy] [int] NULL, [Store] [varchar](5) NOT NULL, [SourceStore] [varchar](5) NOT NULL, [RowGUID] [uniqueidentifier] ROWGUIDCOL NOT NULL, [Repl_ID] [tinyint] NOT NULL, [ECommerceDeliveredDate] [datetime] NULL, [DDEStatusId] [tinyint] NULL, [DDEAmount] [decimal](8, 2) NULL, [OrderMethodTypeId] [int] NULL, [SemiMonthlyUpcharge] [decimal](8, 2) NULL, [DefaultNonRenewalFee] [decimal](8, 2) NULL, [DefaultInHomeVisitFee] [decimal](8, 2) NULL, [NonRenewalSemiMonthlyFee] [decimal](8, 2) NULL, [NonRenewalSemiMonthlyFeeGracePeriod] [int] NULL, [NonRenewalFeeTypeId] [int] NULL, [NonRenewalSemiMonthlyRate] [decimal](8, 4) NULL, [NonRenewalMonthlyRate] [decimal](8, 4) NULL, [NonRenewalWeeklyFeeGracePeriod] [int] NULL, [NonRenewalWeeklyRate] [decimal](8, 4) NULL, [NonRenewalWeeklyFee] [decimal](8, 2) NULL, [DefaultNSFFee] [decimal](8, 2) NULL, [WeeklyUpcharge] [decimal](8, 2) NULL, [IsInHomeFeeEnabled] [bit] NULL, [CanChargeInHomeFeeAndNonRenewalFeeInSamePeriod] [bit] NULL, [ExtensionBalance] [decimal](8, 2) NOT NULL, CONSTRAINT [PK_Agreement_AgreementID_SourceStore] PRIMARY KEY NONCLUSTERED ( [AgreementId] ASC, [SourceStore] ASC, [Repl_ID] ASC, [RowGUID] ASC )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY] ) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]

実行計画

Showplan XMLダウンロードここ（ペーストビン）

Paul White 9 · Accepted Answer

WHERE句を削除するとインデックスは期待どおりに機能し、両方のテーブルから1000レコードが表示されますが、リストされているWHERE句を追加すると、[Customer].[CustomerAgreement]は1000ではなくすべてのレコードを推定します。

短い答えは、WHERE句の行をフィルタリングしないと、クエリオプティマイザーは、各テーブルから1000行を読み取るだけで必要な1000行の結果を生成できると推定します。

あなたはこのクエリの実行プランを提供しなかったので、それ以上は言えません。この場合、クエリでORDER BY句も省略されている可能性があります。そうでない場合は、通常、並べ替えが必要になる可能性があり、通常はサブツリーからすべての行を読み取る必要があります。

WHERE句を使用すると、オプティマイザーは1000行目がクライアントに返される前に、より多くの行が読み取られることを期待しています（フィルター効果が予想されるため）。読み込まれた行の推定数と実際の数の違いは、利用可能な統計から複雑な述語の選択性を推定する際の問題によるものです。これは、統計がデータを表すと仮定しても当てはまります。基本的に：難しすぎて、オプティマイザはカーディナリティを誤って推定します。

1.インデックス付き計算列ソリューション

以下の変更は価値のあるものです。

#stores一時テーブルにインデックスを付けます。

CREATE UNIQUE CLUSTERED INDEX index_name ON #stores ( Store, StartDate );

計算列を契約表に追加します。これはストレージを使用せず、非常に高速なメタデータのみの操作です。
```
ALTER TABLE POSREPL3Agreement.Agreement ADD ComputedModifiedDate AS ISNULL(ModifiedDate, CreatedDate); 
```
計算列を使用するようにインデックスを作成（または既存のインデックスを変更）します。これはORDER BY句を満たします。
```
CREATE INDEX index_name ON POSREPL3Agreement.Agreement ( ComputedModifiedDate, AgreementId ) INCLUDE (SourceStore); 
```

計算列を直接参照するようにクエリを簡略化します。

SELECT TOP (1000) A.SourceStore, A.AgreementId, ModifiedDate = A.ComputedModifiedDate FROM #stores AS S JOIN POSREPL3Agreement.Agreement AS A ON A.SourceStore = S.Store JOIN Customer.CustomerAgreement AS CA ON CA.SourceStore = A.SourceStore AND CA.AgreementId = A.AgreementId WHERE CA.IsPrimary = 1 AND A.ComputedModifiedDate BETWEEN S.StartDate AND S.RangeEnd AND ( A.ComputedModifiedDate > S.StartDate OR A.AgreementId > S.LastId ) ORDER BY A.ComputedModifiedDate, A.AgreementId;

表示されているようにクエリを書き換えることができない場合、計算された列のインデックスには、技術的な理由で2つの列を追加する必要があります。

CREATE INDEX index_name ON POSREPL3Agreement.Agreement ( ComputedModifiedDate, AgreementId ) INCLUDE ( SourceStore, CreatedDate, ModifiedDate ) WITH DROP_EXISTING;

予想される実行計画は依然として不正確な見積もりを示します（オプティマイザは順序付けられた計算列インデックスのスキャンを停止する速さについてオプティマイザが過度に楽観的であるため）が、（上位Nおよび個別の）ソートは排除され、依然としてより優れたパフォーマンスを発揮するはずです。

2.包括的なインデックス付きビューソリューション

計算列とインデックスを追加できない場合は、代わりにインデックス付きビューを使用して調査できます。

CREATE VIEW dbo.ViewName WITH SCHEMABINDING AS SELECT A.SourceStore, A.AgreementId, ComputedModifiedDate = ISNULL(A.ModifiedDate, A.CreatedDate) FROM POSREPL3Agreement.Agreement AS A JOIN Customer.CustomerAgreement AS CA ON CA.SourceStore = A.SourceStore AND CA.AgreementId = A.AgreementId WHERE CA.IsPrimary = 1; GO CREATE UNIQUE CLUSTERED INDEX index_name ON dbo.ViewName ( ComputedModifiedDate, AgreementId, SourceStore );

クエリは次のようになります。

SELECT TOP (1000) VN.SourceStore, VN.AgreementId, ModifiedDate = VN.ComputedModifiedDate FROM #stores AS S JOIN dbo.ViewName AS VN WITH (NOEXPAND) ON VN.SourceStore = S.Store WHERE VN.ComputedModifiedDate BETWEEN S.StartDate AND S.RangeEnd AND ( VN.ComputedModifiedDate > S.StartDate OR VN.AgreementId > S.LastId ) ORDER BY VN.ComputedModifiedDate, VN.AgreementId;

3.よりシンプルなインデックス付きビューソリューション

計算された列のソリューションをインデックス付きビューにより直接的に反映することも可能ですが、このアイデアは結合を排除しません。

CREATE VIEW dbo.ViewName WITH SCHEMABINDING AS SELECT A.SourceStore, A.AgreementId, ComputedModifiedDate = ISNULL(A.ModifiedDate, A.CreatedDate) FROM POSREPL3Agreement.Agreement AS A; GO CREATE UNIQUE CLUSTERED INDEX index_name ON dbo.ViewName ( ComputedModifiedDate, AgreementId, SourceStore );

今回はクエリは次のようになります。

SELECT TOP (1000) VN.SourceStore, VN.AgreementId, ModifiedDate = VN.ComputedModifiedDate FROM #stores AS S JOIN dbo.ViewName AS VN WITH (NOEXPAND) ON VN.SourceStore = S.Store JOIN Customer.CustomerAgreement AS CA ON CA.SourceStore = VN.SourceStore AND CA.AgreementId = VN.AgreementId WHERE CA.IsPrimary = 1 AND VN.ComputedModifiedDate BETWEEN S.StartDate AND S.RangeEnd AND ( VN.ComputedModifiedDate > S.StartDate OR VN.AgreementId > S.LastId ) ORDER BY VN.ComputedModifiedDate, VN.AgreementId;

ビューでどの列が一意になるかを推測する必要がありました。いずれかのソリューションを実装する場合は、より広いワークロードを考慮し、必要に応じてベーステーブルから一意の列を追加して、ビューのクラスター化インデックスを一意にします。 [RowGUID]列はこのためのお気に入りに見えます。

ベーステーブルのデータ変更に対するインデックス付きビューの影響、および必要なストレージの量を評価するために、慎重にテストする必要があります。

これらのソリューションのいずれかを使用すると、一時テーブルに非常に早い日付範囲が含まれている場合、または最近作成または変更された日付を持たないストアがある場合、パフォーマンスは期待どおりにならない可能性があります。あなたはそれをテストして、それがあなたの実際のデータと要件とどのように調和するかを見るべきです。

分離レベルをREAD UNCOMMITTEDに設定するステートメントは、パフォーマンスを向上させるために必死の試みでのみ存在する場合は、削除する必要があります。詳細は私の記事をご覧ください。

また、両方のテーブルが現在ヒープになっていることにも気づきました。スペース管理の理由から、ほとんどのテーブルはクラスタードインデックスを使用することでメリットがあることに注意してください。ヒープで削除が発生した場合、自動的に割り当て解除された空のページを再利用するために、ヒープを時々再構築する必要がある場合があります。

ちなみに、@ storesクエリのRandは、おそらく考えているような効果はありません。すべての行に同じ値を生成（ランタイム定数）。