simulateD {FlexRL} | R Documentation |
simulateD
Description
simulateD
Usage
simulateD(
data,
linksR,
sumRowD,
sumColD,
truepivsA,
truepivsB,
gamma,
eta,
alpha,
phi
)
Arguments
data |
A list with elements:
|
linksR |
A matrix of 2 columns with indices of the linked records. |
sumRowD |
A boolean vector indicating, for each row of the linkage matrix, i.e. for each record in the smallest file A, whether the record has a link in B or not. |
sumColD |
A boolean vector indicating, for each column of the linkage matrix, i.e. for each record in the largest file B, whether the record has a link in A or not. |
truepivsA |
A matrix of the shape of data source A, representing the true values of the PIVs underlying the registered values present in A. |
truepivsB |
A matrix of the shape of data source B, representing the true values of the PIVs underlying the registered values present in B. |
gamma |
The proportion of linked records as a fraction of the smallest file. |
eta |
The distribution weights for the PIVs. |
alpha |
The parameter involved in the survival model for the probability of true values to coincide (parameter for instability). |
phi |
The proportion of mistakes and missing for the PIVs. |
Value
A list with:
new set of links
new sumRowD
new sumColD
new value of the complete log likelihood
new number fo linked records
Examples
PIVs_config = list( V1 = list(stable = TRUE),
V2 = list(stable = TRUE),
V3 = list(stable = TRUE),
V4 = list(stable = TRUE),
V5 = list( stable = FALSE,
conditionalHazard = FALSE,
pSameH.cov.A = c(),
pSameH.cov.B = c()) )
PIVs = names(PIVs_config)
PIVs_stable = sapply(PIVs_config, function(x) x$stable)
Nval = c(6, 7, 8, 9, 15)
NRecords = c(13, 15)
Nlinks = 6
PmistakesA = c(0.02, 0.02, 0.02, 0.02, 0.02)
PmistakesB = c(0.02, 0.02, 0.02, 0.02, 0.02)
PmissingA = c(0.007, 0.007, 0.007, 0.007, 0.007)
PmissingB = c(0.007, 0.007, 0.007, 0.007, 0.007)
moving_params = list(V1=c(),V2=c(),V3=c(),V4=c(),V5=c(0.28))
enforceEstimability = TRUE
DATA = DataCreation( PIVs_config,
Nval,
NRecords,
Nlinks,
PmistakesA,
PmistakesB,
PmissingA,
PmissingB,
moving_params,
enforceEstimability)
A = DATA$A
B = DATA$B
Nvalues = DATA$Nvalues
encodedA = A
encodedB = B
encodedA[,PIVs][ is.na(encodedA[,PIVs]) ] = 0
encodedB[,PIVs][ is.na(encodedB[,PIVs]) ] = 0
dataForStEM = list( A = encodedA,
B = encodedB,
Nvalues = Nvalues,
PIVs_config = PIVs_config,
controlOnMistakes = c(TRUE, TRUE, TRUE, TRUE, TRUE),
sameMistakes = TRUE,
phiMistakesAFixed = TRUE,
phiMistakesBFixed = TRUE,
phiForMistakesA = c(NA, NA, NA, NA, 0),
phiForMistakesB = c(NA, NA, NA, NA, 0)
)
initDeltaMap()
linksR = base::matrix(0,0,2)
linksCpp = linksR
sumRowD = rep(0, nrow(dataForStEM$A))
sumColD = rep(0, nrow(dataForStEM$B))
nlinkrec = 0
survivalpSameH = base::matrix(1, nrow(linksR), length(dataForStEM$Nvalues))
gamma = 0.5
eta = lapply(dataForStEM$Nvalues, function(x) rep(1/x,x))
phi = lapply(dataForStEM$Nvalues, function(x) c(0.9,0.9,0.1,0.1))
nCoefUnstable = lapply( seq_along(PIVs_stable),
function(idx)
if(PIVs_stable[idx]){ 0 }
else{
ncol(dataForStEM$A[, dataForStEM$PIVs_config[[idx]]$pSameH.cov.A,
drop=FALSE]) +
ncol(dataForStEM$B[, dataForStEM$PIVs_config[[idx]]$pSameH.cov.B,
drop=FALSE]) +
1 } )
alpha = lapply( seq_along(PIVs_stable),
function(idx) if(PIVs_stable[idx]){ c(-Inf) }else{
rep(log(0.05), nCoefUnstable[[idx]]) })
newTruePivs = simulateH(data=dataForStEM, links=linksCpp, survivalpSameH=survivalpSameH,
sumRowD=sumRowD, sumColD=sumColD, eta=eta, phi=phi)
truepivsA = newTruePivs$truepivsA
truepivsB = newTruePivs$truepivsB
Dsample = simulateD(data=dataForStEM, linksR=linksR, sumRowD=sumRowD, sumColD=sumColD,
truepivsA=truepivsA, truepivsB=truepivsB,
gamma=gamma, eta=eta, alpha=alpha, phi=phi)
linksCpp = Dsample$links
linksR = linksCpp + 1