The question:
I’m running SQL Server 2019 Enterprise Edition. Every month, we run a stored procedure that loads millions of records with service dates going back five years. I have 400GB of space allocated across 4 data files, and 100GB of space allocated for the Log file. The job frequently fails because the Log file fills due to Active Transaction. The database is in the simple recovery model. So, I believe it should clear at the end of each transaction. The developer changed the job so that it loops through and loads the records one year at a time.
DROP TABLE IF EXISTS #UnpvtDx;
SELECT ClaimHeader_ID
,ClaimDetail_ID
,ClaimServiceLine
,Unpvt.CodeLine
,Unpvt.DxCode
INTO #UnpvtDx
FROM PRINCE.Claim.ClaimDetail det WITH (NOLOCK)
UNPIVOT
(
DxCode FOR CodeLine
IN
(
Diagnosis1CD,Diagnosis2CD,Diagnosis3CD,Diagnosis4CD,Diagnosis5CD,
Diagnosis6CD,Diagnosis7CD,Diagnosis8CD,Diagnosis9CD,
Diagnosis10CD,Diagnosis11CD,Diagnosis12CD,Diagnosis13CD
)
) as Unpvt ---53 secs
WHERE YEAR(ServiceFromDT) = @year;
DROP TABLE IF EXISTS #UnpvtPointer;
SELECT ClaimHeader_ID
,ClaimDetail_ID
,ClaimServiceLine
,Unpvt.CodeLine
,Unpvt.Pointer
INTO #UnpvtPointer
FROM PRINCE.Claim.ClaimDetail det WITH (NOLOCK)
UNPIVOT
(
Pointer FOR CodeLine
IN (DiagPointer1,DiagPointer2,DiagPointer3,DiagPointer4)
) as Unpvt ---40 secs
WHERE YEAR(ServiceFromDT) = @year;
INSERT INTO PROD.Claim.ClaimDiag
(
ClaimHeader_ID,ClaimDetail_ID,SourceID,EDWLoadDTS,PartnerCD,
PartnerNM,ClaimID,ClaimServiceLine,ClaimStatus,CCOMemberID,
MemberID,PlaceOfServiceCD,ServiceFromDT,ServiceToDT,ClaimForm,
TypeOfBillCD,DiagnosisCD,DiagnosisDESC,DiagPointer
)
SELECT DISTINCT
det.ClaimHeader_ID,det.ClaimDetail_ID,det.SourceID,det.EDWLoadDTS,
det.PartnerCD,det.PartnerNM,det.ClaimID,det.ClaimServiceLine,
det.ClaimStatus,det.CCOMemberID,det.MemberID,det.PlaceOfServiceCD,
det.ServiceFromDT,det.ServiceToDT,det.ClaimForm,det.TypeOfBillCD,
DiagnosisCD = dx.DxCode,
DiagnosisDESC = diag.DiagnosisDESC,
DiagPointer = point.Pointer
FROM PROD.Claim.ClaimDetail det WITH (NOLOCK)
INNER JOIN PROD.Claim.ClaimHeader ch WITH (NOLOCK)
ON ch.ClaimHeader_ID = det.ClaimHeader_ID
INNER JOIN #UnpvtDx dx
ON dx.ClaimDetail_ID = det.ClaimDetail_ID
AND dx.ClaimHeader_ID = det.ClaimHeader_ID
AND dx.ClaimServiceLine = det.ClaimServiceLine
LEFT JOIN #UnpvtPointer point
ON point.ClaimDetail_ID = det.ClaimDetail_ID
AND point.ClaimHeader_ID = det.ClaimHeader_ID
AND point.ClaimServiceLine = det.ClaimServiceLine
LEFT OUTER JOIN Reference.Reference.Diagnosis diag WITH (NOLOCK)
ON dx.DxCode = diag.DiagnosisCD
AND diag.ICDVersion = 'ICD10CM'
AND diag.ActiveFLG = 1
WHERE YEAR(det.ServiceFromDT) = @year;
The stored procedure is executed from a SQL Agent job with this command:
DECLARE @year INT
DECLARE cur CURSOR FOR
SELECT yr = YEAR(hdr.MinServiceFromDT)
FROM PROD.Claim.ClaimHeader hdr WITH (NOLOCK)
GROUP BY YEAR(hdr.MinServiceFromDT)
ORDER BY YEAR(hdr.MinServiceFromDT)
OPEN cur
FETCH NEXT FROM cur INTO @year
WHILE @@FETCH_STATUS = 0
BEGIN
EXEC Claim.sp_UpdateClaimDiag @year
FETCH NEXT FROM cur INTO @year
END
CLOSE cur
DEALLOCATE cur
Is the end of a loop considered a transaction and therefore the log file should empty after every year of records is processed, or does the log file continue to fill until the job has iterated through every loop and loaded the records for all years?
I am also going to increase the Log file to 150GB, but that will max out the available space (without dropping below the 10% buffer).
Posting entire stored procedure code.
USE [Prod]
GO
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE PROCEDURE [Claim].[sp_UpdateClaimDiag] @year INT
AS
BEGIN
SET ANSI_DEFAULTS, ARITHABORT, NOCOUNT ON
SET IMPLICIT_TRANSACTIONS OFF
SET TRANSACTION ISOLATION LEVEL READ COMMITTED
-- variable declaration
DECLARE
@transactional BIT
, @trancount INT
, @err INT
, @procname SYSNAME
, @error INT
, @message VARCHAR(4000)
, @xstate INT
, @RecordCount int;
SELECT @procname = OBJECT_SCHEMA_NAME(@@PROCID, DB_ID()) + '.' + OBJECT_NAME(@@PROCID, DB_ID())
-- 0 = no: will not execute batches inside a transaction; a partial success of procedure is possible
-- 1 = yes: batches in procedure will be bound together by a transaction, partial success is impossible
, @transactional = 0
-- optionally begin transaction and begin try block
IF @transactional = 1 SET @trancount = @@TRANCOUNT
BEGIN TRY
IF @trancount = 0 and @transactional = 1
BEGIN TRANSACTION
ELSE IF @transactional = 1
SAVE TRANSACTION p1
----------------------------------------------------------------------------------------
---Unpivot columns to Rows into Temp tables
----------------------------------------------------------------------------------------
---ICD9CM & ICD10CM
DROP TABLE IF EXISTS #UnpvtDx;
SELECT
ClaimHeader_ID
,ClaimDetail_ID
,ClaimServiceLine
,Unpvt.CodeLine
,Unpvt.DxCode
INTO #UnpvtDx
FROM Prod.Claim.ClaimDetail det WITH (NOLOCK)
UNPIVOT
(
DxCode FOR CodeLine IN
(
Diagnosis1CD,Diagnosis2CD,Diagnosis3CD,
Diagnosis4CD,Diagnosis5CD,Diagnosis6CD,
Diagnosis7CD,Diagnosis8CD,Diagnosis9CD,
Diagnosis10CD,Diagnosis11CD,Diagnosis12CD,
Diagnosis13CD
)
) as Unpvt ---53 secs
WHERE YEAR(ServiceFromDT) = @year;
--Select top 100 * from #UnpvtDx where DxCode is null
DROP TABLE IF EXISTS #UnpvtPointer;
SELECT
ClaimHeader_ID
,ClaimDetail_ID
,ClaimServiceLine
,Unpvt.CodeLine
,Unpvt.Pointer
INTO #UnpvtPointer
FROM Prod.Claim.ClaimDetail det WITH (NOLOCK)
UNPIVOT
(
Pointer FOR CodeLine IN
(
DiagPointer1, DiagPointer2,
DiagPointer3,DiagPointer4
)
) as Unpvt ---40 secs
WHERE YEAR(ServiceFromDT) = @year;
--Select top 100 * from #UnpvtPointer
----------------------------------------------------------------------------------------
--- INSERT INTO yearly records from the temp table
----------------------------------------------------------------------------------------
INSERT INTO Prod.Claim.ClaimDiag (
ClaimHeader_ID,
ClaimDetail_ID,
SourceID,
EDWLoadDTS,
PartnerCD,
PartnerNM,
ClaimID,
ClaimServiceLine,
ClaimStatus,
CCOMemberID,
MemberID,
PlaceOfServiceCD,
ServceFromDT,
ServiceToDT,
ClaimForm,
TypeOfBillCD,
DiagnosisCD,
DiagnosisDESC,
DiagPointer
)
SELECT DISTINCT
det.ClaimHeader_ID,
det.ClaimDetail_ID,
det.SourceID,
det.EDWLoadDTS,
det.PartnerCD,
det.PartnerNM,
det.ClaimID,
det.ClaimServiceLine,
det.ClaimStatus,
det.CCOMemberID,
det.MemberID,
det.PlaceOfServiceCD,
det.ServiceFromDT,
det.ServiceToDT,
det.ClaimForm,
det.TypeOfBillCD,
DiagnosisCD = dx.DxCode,
DiagnosisDESC = diag.DiagnosisDESC,
DiagPointer = point.Pointer
FROM Prod.Claim.ClaimDetail det WITH (NOLOCK)
INNER JOIN Prod.Claim.ClaimHeader ch WITH (NOLOCK)
ON ch.ClaimHeader_ID = det.ClaimHeader_ID
INNER JOIN #UnpvtDx dx
ON dx.ClaimDetail_ID = det.ClaimDetail_ID
AND dx.ClaimHeader_ID = det.ClaimHeader_ID
AND dx.ClaimServiceLine = det.ClaimServiceLine
LEFT JOIN #UnpvtPointer point
ON point.ClaimDetail_ID = det.ClaimDetail_ID
AND point.ClaimHeader_ID = det.ClaimHeader_ID
AND point.ClaimServiceLine = det.ClaimServiceLine
LEFT OUTER JOIN Reference.Reference.Diagnosis diag WITH (NOLOCK)
ON dx.DxCode = diag.DiagnosisCD
AND diag.ICDVersion = 'ICD10CM'
AND diag.ActiveFLG = 1
WHERE YEAR(det.ServiceFromDT) = @year
--AND Year(det.ServiceFromDT) = 2021--for testing
--and det.ClaimID ='21006E06455'--for testing
----------------------------------------------------------------------------------------
--insert into updatelog table
SET @RecordCount = @@ROWCOUNT;
DECLARE @procName1 SYSNAME
SET @procName1 = @procname + ' ' + CAST(@year AS varchar(4))
INSERT INTO Prod.dbo.UpdateLog(EventTimestamp,EventDescription,ProcName,TableName)
SELECT GETDATE(),
'Inserted ' + CAST(@RecordCount AS varchar(100)) + ' records',
@procName1,
'Claim.ClaimDiag'
----------------------------------------------------------------------------------------
DROP TABLE IF EXISTS #UnpvtDx
DROP TABLE IF EXISTS #UnpvtPointer
----------------------------------------------------------------------------------------
SPEXIT:
IF @transactional = 1 and @trancount = 0 COMMIT
END TRY
----------------------------------------------------------------------------------------
-- error handling with catch
BEGIN CATCH
SELECT @error = ERROR_NUMBER(), @message = ERROR_MESSAGE(), @xstate = XACT_STATE()
IF @transactional = 1 and @xstate = -1 ROLLBACK
IF @transactional = 1 and @xstate = 1 and @trancount = 0 ROLLBACK
IF @transactional = 1 and @xstate = 1 and @trancount > 0 ROLLBACK TRANSACTION p1
DROP TABLE IF EXISTS #claims
SET @procName1 = @procname + ' ' + CAST(@year AS varchar(4)) ---+ ', ' + CAST(@month AS varchar(4))
RAISERROR ('%s, Error %d, %s', 16, 1, @procname1, @error, @message)
RETURN @error
END CATCH
RETURN 0
END
GO
The Solutions:
Below are the methods you can try. The first solution is probably the best. Try others if the first one doesn’t work. Senior developers aren’t just copying/pasting – they read the methods carefully & apply them wisely to each case.
Method 1
When using the SIMPLE RECOVERY
model, the transaction log is truncated at each checkpoint. Not necessarily at the end of your transaction. Checkpoints happen every 60 seconds.
Looking at the below section of your code, you’re not actually explicitly starting transaction at all. You initially set @transactional = 0
, and then only start a transaction if @transactional = 1
. Also, you’re only setting @trancount
to a value here based on the IF
statement. The BEGIN TRY
block still starts, since you do not have a BEGIN/END
block following the IF
statement. This is not your problem though, it just means you’re doing implicit transactions each time, instead of creating an explicit transaction.
SELECT @procname = OBJECT_SCHEMA_NAME(@@PROCID, DB_ID()) + '.' + OBJECT_NAME(@@PROCID, DB_ID())
-- 0 = no: will not execute batches inside a transaction; a partial success of procedure is possible
-- 1 = yes: batches in procedure will be bound together by a transaction, partial success is impossible
, @transactional = 0
-- optionally begin transaction and begin try block
IF @transactional = 1 SET @trancount = @@TRANCOUNT
BEGIN TRY
IF @trancount = 0 and @transactional = 1
BEGIN TRANSACTION
ELSE IF @transactional = 1
SAVE TRANSACTION p1
I’d suspect that even at a years worth of data at a time, you’re generating more transaction log throughput than your drive can support. How much space is one year worth of data taking up in this table?
Consider dropping your batch size from a year to a month and see if that helps. While you’re at it, stop surrounding your date column with a YEAR() function. This makes those columns non-SARGABLE, which means SQL Server cannot use any existing index when retrieving the data. This could be causing your transaction to run longer than needed, and thus causing other inflight transactions to be retained in the log while this transaction runs.
Consider the below changes to your stored procedure. I simplified it down a bit to highlight the important parts to be changed. You’ll need to merge it with what you have and test it. This makes it easier to control your batch size when you call the procedure.
Also, I removed the NOLOCK
hints. Read more here to understand why that’s a bad idea. You likely thought you needed the NOLCOK
hints because of the non-SARGABLE where clauses.
CREATE PROCEDURE [Claim].[sp_UpdateClaimDiag]
@Interval int = 30
AS
BEGIN
DECLARE
@MinServiceFromDT DATETIME
@MaxServiceFromDT DATETIME,
@NextServiceFromDT DATETIME;
SELECT
@MinServiceFromDT = MIN(hdr.MinServiceFromDT),
@MaxServiceFromDT = MAX(hdr.MinServiceFromDT)
FROM PROD.Claim.ClaimHeader;
WHILE @MinServiceFromDT <= @MaxServiceFromDT
BEGIN
BEGIN TRY
@NextServiceFromDT = DATEADD(day,@Interval,@MinServiceFromDT);
---ICD9CM & ICD10CM
DROP TABLE IF EXISTS #UnpvtDx;
DROP TABLE IF EXISTS #UnpvtPointer;
--------------------------------------------------------------
---Unpivot columns to Rows into Temp tables
--------------------------------------------------------------
BEGIN TRANSACTION
SELECT ClaimHeader_ID, ...
INTO #UnpvtDx
FROM Prod.Claim.ClaimDetail det
UNPIVOT
(
DxCode FOR CodeLine IN
(
Diagnosis1CD,...
)
) as Unpvt
WHERE ServiceFromDT >= @MinServiceFromDT
AND ServiceFromDT < @NextServiceFromDT;
SELECT ClaimHeader_ID, ...
INTO #UnpvtPointer
FROM Prod.Claim.ClaimDetail det
UNPIVOT
(
Pointer FOR CodeLine IN
(
DiagPointer1, DiagPointer2,
DiagPointer3,DiagPointer4
)
) as Unpvt
WHERE ServiceFromDT >= @MinServiceFromDT
AND ServiceFromDT < @NextServiceFromDT;
--------------------------------------------------------------
--- INSERT INTO yearly records from the temp table
--------------------------------------------------------------
INSERT INTO Prod.Claim.ClaimDiag
(ClaimHeader_ID, ....)
SELECT DISTINCT det.ClaimHeader_ID, ....
FROM Prod.Claim.ClaimDetail det WITH (NOLOCK)
INNER JOIN Prod.Claim.ClaimHeader ch WITH (NOLOCK)
ON ch.ClaimHeader_ID = det.ClaimHeader_ID
INNER JOIN #UnpvtDx dx
ON dx.ClaimDetail_ID = det.ClaimDetail_ID
AND dx.ClaimHeader_ID = det.ClaimHeader_ID
AND dx.ClaimServiceLine = det.ClaimServiceLine
LEFT JOIN #UnpvtPointer point
ON point.ClaimDetail_ID = det.ClaimDetail_ID
AND point.ClaimHeader_ID = det.ClaimHeader_ID
AND point.ClaimServiceLine = det.ClaimServiceLine
LEFT OUTER JOIN Reference.Reference.Diagnosis diag
ON dx.DxCode = diag.DiagnosisCD
AND diag.ICDVersion = 'ICD10CM'
AND diag.ActiveFLG = 1
WHERE det.ServiceFromDT >= @MinServiceFromDT
AND det.ServiceFromDT < @NextServiceFromDT;
--------------------------------------------------------------
--insert into updatelog table
COMMIT TRANSACTION
SET @ MinServiceFromDT = @NextServiceFromDT
END TRY
------------------------------------------------------------------
-- error handling with catch
BEGIN CATCH
<do error handling stuff>
END CATCH
END
END
GO
All methods was sourced from stackoverflow.com or stackexchange.com, is licensed under cc by-sa 2.5, cc by-sa 3.0 and cc by-sa 4.0