from
0.08
transform
0.11
ers
0.53
from
import
0.53
from
Rob
1.0
Rob
ert
1.0
ert
a
0.99
a
Tokenizer
0.08
Encoder
,
0.99
,
Rob
1.0
Rob
ert
1.0
ert
a
0.99
a
Config
0.16
Embedding
,
0.96
,
Rob
1.0
Rob
ert
1.0
ert
a
0.98
a
For
0.08
Config
Masked
0.05
N
LM
0.17
Tokens
from
0.93
from
transform
0.42
log
ers
0.44
lib
import
1.0
import
logging
0.19
tokenize
import
1.0
import
torch
0.15
torch
import
0.98
import
torch
0.99
torch
.
1.0
.
nn
0.96
nn
as
0.99
as
nn
0.91
nn
from
0.93
from
copy
0.95
copy
import
1.0
import
deepcopy
0.5
deepcopy
import
0.99
import
numpy
0.99
numpy
as
1.0
as
np
0.95
np
from
0.97
from
tq
1.0
tq
dm
1.0
dm
import
1.0
import
tq
1.0
tq
dm
1.0
dm
import
0.97
import
torch
0.97
torch
.
1.0
.
nn
0.97
nn
.
0.99
.
functional
0.83
softmax
as
0.99
as
n
1.0
n
nf
0.97
nf
from
0.99
from
math
0.92
math
import
1.0
import
ceil
0.82
sqrt
def
1.0
def
cos
1.0
cos
ine
1.0
ine
_
1.0
_
similar
1.0
similar
ity
1.0
ity
(
1.0
(
a
1.0
a
,
1.0
,
b
1.0
b
):
1.0
):
0.84
return
1.0
return
np
0.94
np
.
1.0
.
dot
0.88
dot
(
0.98
(
a
1.0
a
,
0.96
,
b
0.98
b
)
1.0
)
/
0.97
/
(
1.0
(
np
1.0
np
.
1.0
.
linalg
1.0
linalg
.
1.0
.
norm
1.0
norm
(
1.0
(
a
1.0
a
)
0.98
)
*
0.31
/
np
1.0
np
.
1.0
.
linalg
1.0
linalg
.
1.0
.
norm
1.0
norm
(
1.0
(
b
1.0
b
))
0.99
))
def
1.0
def
embeddings
0.98
embeddings
(
1.0
(
text
1.0
text
,
1.0
,
tokenizer
0.94
tokenizer
,
1.0
,
model
1.0
model
,
1.0
,
cache
1.0
cache
):
1.0
):
1.0
if
1.0
if
text
0.99
text
in
1.0
in
cache
1.0
cache
:
1.0
:
1.0
return
1.0
return
cache
1.0
cache
[
1.0
[
text
1.0
text
]
1.0
]
1.0
else
0.99
else
:
1.0
:
1.0
inputs
0.25
text
=
1.0
=
tokenizer
0.3
tokenizer
(
0.95
(
text
0.96
text
,
1.0
,
return
0.24
input
_
1.0
_
tensors
0.28
type
='
0.96
='
pt
0.06
all
')
0.98
')
1.0
encoder
0.36
model
_
1.0
_
out
0.97
output
=
1.0
=
model
0.47
model
(**
0.98
(
inputs
0.18
kwargs
,
0.99
,
output
0.17
include
_
1.0
_
hidden
0.66
hidden
_
1.0
_
states
0.41
size
=
1.0
=
True
0.54
True
)
1.0
)
1.0
embeddings
1.0
embeddings
=
1.0
=
encoder
0.19
encoder
_
1.0
_
out
0.98
output
.
0.97
.
hidden
0.94
hidden
_
1.0
_
states
0.49
states
[
0.96
[
0
0.9
0
][
0.33
,
0
0.55
0
,
0.93
,
1
0.58
0
,
0.78
,
:]
0.86
:]
1.0
cache
1.0
cache
[
1.0
[
text
1.0
text
]
1.0
]
=
1.0
=
embeddings
0.99
embeddings
0.85
return
1.0
return
embeddings
0.98
embeddings
def
1.0
def
for
0.1
transform
_
1.0
_
idx
0.11
batch
(
0.99
(
idxs
0.99
idxs
,
1.0
,
inputs
0.94
inputs
,
1.0
,
model
1.0
model
,
1.0
,
tokenizer
1.0
tokenizer
,
1.0
,
embeddings
1.0
embeddings
_
1.0
_
cache
1.0
cache
):
1.0
):
1.0
original
0.8
original
s
1.0
s
=
1.0
=
[]
1.0
[]
1.0
original
1.0
original
s
1.0
s
_
1.0
_
embeddings
1.0
embeddings
=
1.0
=
[]
1.0
[]
1.0
for
1.0
for
idx
0.99
idx
in
1.0
in
idx
1.0
idx
s
1.0
s
:
1.0
:
1.0
original
1.0
original
=
1.0
=
tokenizer
1.0
tokenizer
.
1.0
.
decode
0.98
decode
(
1.0
(
inputs
0.95
inputs
['
1.0
['
input
1.0
input
_
1.0
_
ids
0.99
ids
'][
1.0
'][
0
0.99
0
][
0.82
][
idx
0.99
idx
])
0.99
])
1.0
original
1.0
original
s
1.0
s
.
0.99
.
append
1.0
append
(
1.0
(
original
1.0
original
)
1.0
)
0.99
original
1.0
original
_
1.0
_
embeddings
1.0
embeddings
=
1.0
=
embeddings
1.0
embeddings
(
1.0
(
1.0
original
1.0
original
,
1.0
,
tokenizer
1.0
tokenizer
,
1.0
,
model
1.0
model
,
1.0
,
embeddings
1.0
embeddings
_
1.0
_
cache
1.0
cache
)
1.0
)
0.97
original
1.0
original
s
1.0
s
_
1.0
_
embeddings
1.0
embeddings
.
1.0
.
append
1.0
append
(
1.0
(
original
1.0
original
_
1.0
_
embeddings
1.0
embeddings
)
1.0
)
1.0
for
1.0
for
idx
1.0
idx
in
1.0
in
idx
0.97
idx
s
1.0
s
:
1.0
:
0.92
inputs
0.85
inputs
.
0.88
['
input
0.96
input
_
1.0
_
ids
0.95
ids
[
0.51
[
torch
0.99
torch
.
1.0
.
tensor
1.0
tensor
(
0.99
(
0
0.81
idx
),
0.3
),
torch
1.0
torch
.
1.0
.
tensor
0.98
tensor
(
0.95
(
0.94
idx
0.82
idx
)]
0.97
)]
=
0.99
=
tokenizer
0.99
tokenizer
.
1.0
.
mask
0.95
mask
_
1.0
_
token
0.96
token
_
1.0
_
id
0.99
id
0.78
encoder
1.0
encoder
_
1.0
_
output
1.0
output
=
1.0
=
model
0.59
model
(**
0.91
(
inputs
0.98
inputs
)
1.0
)
0.96
mask
1.0
mask
_
1.0
_
token
1.0
token
_
1.0
_
index
1.0
index
=
1.0
=
torch
0.89
torch
.
1.0
.
where
0.25
tensor
(
0.98
(
0.51
inputs
0.95
inputs
["
0.93
["
input
0.97
input
_
1.0
_
ids
0.71
ids
"]
0.95
"]
==
0.26
[:,
tokenizer
0.91
tokenizer
.
1.0
.
mask
0.99
mask
_
1.0
_
token
1.0
token
_
1.0
_
id
0.98
id
)[
0.99
)[
1
0.99
0
]
0.76
]
0.99
mask
1.0
mask
_
1.0
_
token
1.0
token
_
1.0
_
logits
1.0
logits
=
1.0
=
encoder
1.0
encoder
_
1.0
_
output
1.0
output
.
1.0
.
logits
0.18
data
[
0.6
([
0
0.48
0
,
0.69
,
mask
1.0
mask
_
1.0
_
token
1.0
token
_
1.0
_
index
1.0
index
,
0.88
,
:]
0.97
:]
1.0
prob
0.98
prob
=
1.0
=
n
1.0
n
nf
1.0
nf
.
0.98
.
softmax
0.27
softmax
(
1.0
(
mask
1.0
mask
_
1.0
_
token
1.0
token
_
1.0
_
logits
1.0
logits
,
1.0
,
dim
0.86
dim
=
0.99
=
1
0.86
1
)
0.99
)
1.0
top
1.0
top
_
1.0
_
ps
0.99
ps
,
0.98
,
_
0.61
_
=
1.0
=
prob
0.86
prob
.
1.0
.
top
0.28
po
k
0.96
k
(
0.96
(
1
0.68
logits
,
1.0
,
dim
0.99
dim
=
0.99
=
1
0.94
1
)
1.0
)
1.0
preds
1.0
preds
=
1.0
=
[]
1.0
[]
1.0
preds
1.0
preds
_
1.0
_
embeddings
1.0
embeddings
=
1.0
=
[]
1.0
[]
1.0
to
1.0
to
ps
1.0
ps
=
1.0
=
torch
0.97
prob
.
1.0
.
top
0.89
top
k
0.95
k
(
1.0
(
mask
1.0
mask
_
1.0
_
token
1.0
token
_
1.0
_
logits
0.54
logits
,
0.93
,
1
0.87
1
,
1.0
,
dim
0.96
dim
=
1.0
=
1
0.97
1
)
1.0
)
0.99
for
1.0
for
i
1.0
i
in
1.0
in
range
1.0
range
(
1.0
(
len
1.0
len
(
1.0
(
idxs
0.49
preds
)):
1.0
)):
1.0
pred
1.0
pred
=
1.0
=
tokenizer
0.99
tokenizer
.
1.0
.
decode
0.97
decode
(
1.0
(
to
0.99
to
ps
0.99
ps
.
0.96
_
indices
0.28
item
[
0.98
[
i
0.99
i
].
0.99
].
tolist
0.55
item
())
1.0
())
1.0
em
0.6
em
b
0.18
b
=
1.0
=
embeddings
1.0
embeddings
(
1.0
(
pred
0.99
pred
,
1.0
,
tokenizer
0.99
tokenizer
,
1.0
,
model
1.0
model
,
1.0
,
embeddings
1.0
embeddings
_
1.0
_
cache
1.0
cache
)
1.0
)
1.0
preds
0.99
preds
.
1.0
.
append
1.0
append
(
1.0
(
pred
0.6
pred
)
1.0
)
0.99
preds
1.0
preds
_
1.0
_
embeddings
1.0
embeddings
.
1.0
.
append
1.0
append
(
1.0
(
emb
0.33
preds
)
1.0
)
1.0
out
1.0
out
=
1.0
=
[]
1.0
[]
1.0
for
1.0
for
i
1.0
i
in
1.0
in
range
1.0
range
(
1.0
(
len
1.0
len
(
1.0
(
idxs
0.86
preds
)):
1.0
)):
1.0
similarity
1.0
similarity
=
1.0
=
cos
1.0
cos
ine
1.0
ine
_
0.99
_
similar
1.0
similar
ity
1.0
ity
(
0.99
(
0.66
original
1.0
original
s
1.0
s
_
1.0
_
embeddings
1.0
embeddings
[
1.0
[
i
1.0
i
].
1.0
].
detach
0.74
item
().
0.59
().
numpy
0.25
item
(),
0.83
,
preds
0.98
preds
_
1.0
_
embeddings
0.99
embeddings
[
1.0
[
i
1.0
i
].
1.0
].
detach
0.76
item
().
0.53
().
numpy
0.54
tolist
())
0.68
())
1.0
out
1.0
out
.
1.0
.
append
1.0
append
({
1.0
({
1.0
'
1.0
'
idx
0.89
idx
':
1.0
':
idx
0.88
idx
s
1.0
s
[
1.0
[
i
1.0
i
],
1.0
],
1.0
'
1.0
'
original
0.92
original
':
1.0
':
original
0.99
original
s
1.0
s
[
1.0
[
i
1.0
i
],
1.0
],
1.0
'
1.0
'
predicted
0.57
predictions
':
1.0
':
preds
0.67
preds
[
1.0
[
i
1.0
i
],
1.0
],
1.0
'
1.0
'
cos
1.0
cos
ine
1.0
ine
_
0.99
_
similar
1.0
similar
ity
1.0
ity
':
1.0
':
similarity
1.0
similarity
,
1.0
,
1.0
'
1.0
'
probability
0.33
top
':
1.0
':
top
1.0
top
_
1.0
_
ps
1.0
ps
[
1.0
[
i
1.0
i
].
1.0
].
item
0.19
numpy
(),
0.67
()
0.98
})
1.0
})
0.97
return
1.0
return
out
1.0
out
def
1.0
def
process
0.09
for
_
1.0
_
batch
0.68
mask
(
1.0
(
inputs
0.73
inputs
,
1.0
,
mask
0.09
n
_
1.0
_
ratio
0.72
token
,
1.0
,
model
0.95
model
,
1.0
,
tokenizer
0.99
tokenizer
,
1.0
,
embeddings
1.0
embeddings
_
1.0
_
cache
1.0
cache
):
1.0
):
1.0
out
0.35
out
=
1.0
=
[]
0.89
[]
1.0
ln
0.93
ln
=
1.0
=
len
0.97
len
(
1.0
(
inputs
0.93
inputs
['
1.0
['
input
0.95
input
_
1.0
_
ids
0.99
ids
'][
1.0
'][
0
0.98
0
])
0.97
])
1.0
n
1.0
n
_
1.0
_
masks
1.0
masks
=
1.0
=
int
0.87
ceil
(
0.99
(
mask
0.14
mask
_
1.0
_
ratio
0.23
token
*
0.77
/
ln
0.53
ln
)
1.0
)
1.0
for
1.0
for
i
0.41
i
in
1.0
in
tq
1.0
tq
dm
1.0
dm
(
0.55
_
range
0.85
range
(
1.0
(
int
0.87
ceil
((
0.92
(
ln
0.55
ln
/
0.52
/
n
0.93
n
_
1.0
_
masks
0.99
masks
))
0.28
)),
)):
0.99
)):
1.0
idx
0.85
original
s
0.97
s
=
1.0
=
[]
0.74
[]
0.83
for
1.0
for
j
0.62
j
in
1.0
in
range
0.93
range
(
1.0
(
n
0.99
n
_
1.0
_
masks
0.61
):
0.91
):
+
0.17
model
1
0.23
model
):
0.23
>>>
0.85
val
0.69
val
=
0.99
=
(
0.8
(
j
0.23
i
*
0.87
*
int
0.47
log
(
0.69
(
ln
0.14
j
/
0.56
/
n
0.56
num
_
0.99
_
masks
0.1
samples
)
0.97
)
+
0.44
for
i
0.88
1
)
0.98
)
0.81
if
0.89
if
val
0.84
val
<
0.29
<
ln
0.75
0
:
0.99
:
0.96
idx
0.74
idx
s
1.0
s
.
1.0
.
append
0.8
append
(
1.0
(
val
0.53
val
)
1.0
)
0.51
out
0.99
out
=
1.0
=
out
0.34
out
+
0.99
+
for
0.28
process
_
0.98
_
idx
0.17
batch
(
0.98
(
idxs
0.24
i
,
0.93
,
deepcopy
0.94
len
(
1.0
(
inputs
0.3
text
),
0.9
),
model
0.98
model
,
1.0
,
0.14
tokenizer
1.0
tokenizer
,
1.0
,
embeddings
1.0
embeddings
_
1.0
_
cache
1.0
cache
)
0.98
))
0.85
return
1.0
return
out
0.99
out
def
1.0
def
process
0.67
process
_
1.0
_
text
0.76
batch
(
1.0
(
text
1.0
text
,
1.0
,
mask
1.0
mask
_
1.0
_
ratio
1.0
ratio
=
0.99
=
0
0.92
0
.
0.99
.
1
0.24
1
):
0.99
):
1.0
logging
0.22
config
.
1.0
.
set
0.63
disable
_
0.97
_
verbosity
0.2
default
_
0.94
_
error
0.35
level
()
0.85
()
0.99
model
1.0
model
_
1.0
_
name
1.0
name
=
1.0
=
"
0.76
'
microsoft
0.11
models
/
0.95
-
unix
0.32
geo
coder
0.36
gram
-
0.57
-
base
0.14
python
-
0.63
-
n
0.14
ml
ine
0.16
gram
"
0.59
'
1.0
device
0.7
device
=
1.0
=
torch
0.9
torch
.
1.0
.
device
0.88
device
('
0.97
('
cuda
0.77
cuda
'
0.74
')
if
1.0
if
torch
0.99
torch
.
0.92
.
cuda
0.97
cuda
.
0.75
.
is
0.95
is
_
1.0
_
available
0.99
available
()
1.0
()
else
1.0
else
'
1.0
'
cpu
0.33
cpu
')
1.0
')
1.0
config
1.0
config
=
1.0
=
Rob
1.0
Rob
ert
1.0
ert
a
0.99
a
Config
0.84
Config
.
1.0
.
from
1.0
from
_
1.0
_
pret
1.0
pret
rained
1.0
rained
(
1.0
(
model
1.0
model
_
1.0
_
name
1.0
name
)
1.0
)
1.0
config
0.99
config
.
1.0
.
is
0.09
use
_
1.0
_
decoder
0.55
training
=
0.99
=
False
0.64
True
1.0
tokenizer
1.0
tokenizer
=
1.0
=
Rob
1.0
Rob
ert
1.0
ert
a
1.0
a
Tokenizer
0.98
Tokenizer
.
1.0
.
from
1.0
from
_
1.0
_
pret
1.0
pret
rained
1.0
rained
(
1.0
(
model
1.0
model
_
1.0
_
name
1.0
name
)
1.0
)
1.0
model
0.96
model
=
1.0
=
Rob
1.0
Rob
ert
1.0
ert
a
0.99
a
For
0.08
Model
Masked
0.19
N
LM
0.69
Model
.
0.99
.
from
1.0
from
_
1.0
_
pret
1.0
pret
rained
1.0
rained
(
1.0
(
model
1.0
model
_
1.0
_
name
1.0
name
,
1.0
,
config
0.97
config
=
0.98
=
config
1.0
config
)
1.0
)
1.0
lm
1.0
lm
_
1.0
_
head
1.0
head
=
1.0
=
nn
0.69
torch
.
1.0
.
Linear
0.48
Linear
(
1.0
(
config
0.96
config
.
1.0
.
hidden
0.21
model
_
1.0
_
size
1.0
size
,
0.98
,
config
0.96
config
.
1.0
.
vocab
0.32
batch
_
1.0
_
size
0.98
size
,
1.0
,
bias
0.25
bias
=
1.0
=
False
0.53
True
)
1.0
)
1.0
lm
1.0
lm
_
1.0
_
head
1.0
head
.
0.9
.
weight
0.86
weight
=
0.98
=
model
0.48
config
.
0.96
.
base
0.18
lm
_
0.78
_
model
0.13
lm
.
0.66
_
embeddings
0.1
embedding
.
0.73
.
word
0.13
base
_
0.93
_
embeddings
0.22
mean
.
0.62
.
weight
0.41
weight
1.0
model
0.97
model
.
0.97
.
lm
0.98
lm
_
1.0
_
head
0.98
head
=
0.99
=
lm
1.0
lm
_
1.0
_
head
1.0
head
1.0
embeddings
1.0
embeddings
_
1.0
_
cache
1.0
cache
=
1.0
=
{}
0.92
{}
1.0
inputs
0.64
inputs
=
1.0
=
tokenizer
0.99
tokenizer
(
1.0
(
text
0.73
text
,
1.0
,
return
1.0
return
_
1.0
_
tensors
1.0
tensors
='
1.0
='
pt
0.91
pt
',
1.0
',
0.12
truncation
0.98
truncation
=
1.0
=
True
0.93
True
,
1.0
,
max
1.0
max
_
1.0
_
length
1.0
length
=
1.0
=
1024
0.93
1024
)
0.99
)
1.0
out
0.75
out
=
1.0
=
[]
0.48
[]
1.0
n
1.0
n
_
1.0
_
batches
1.0
batches
=
1.0
=
int
0.98
int
(
1.0
(
ceil
0.49
ceil
(
0.88
(
len
1.0
len
(
1.0
(
text
0.98
text
)/
0.81
)/
2500
0.2
1024
))
0.99
))
1.0
if
0.97
if
n
1.0
n
_
1.0
_
batches
1.0
batches
>
0.66
>
1
0.08
1024
:
1.0
:
0.97
print
0.96
print
("
0.97
("
your
0.16
This
file
0.66
{}
's
0.74
is
so
0.94
too
big
0.3
far
it
0.39
text
had
0.33
seems
to
0.99
to
be
0.98
be
split
0.22
processed
in
0.6
into
{}
0.97
{}
batches
0.78
batches
...
0.32
...
".
0.98
".
format
1.0
format
(
1.0
(
n
1.0
n
_
1.0
_
batches
1.0
batches
))
0.99
))
0.97
for
1.0
for
i
1.0
i
in
1.0
in
range
0.86
range
(
1.0
(
0
0.92
0
,
1.0
,
len
0.99
len
(
1.0
(
text
1.0
text
),
0.99
),
2
1.0
2
500
1.0
500
):
1.0
):
1.0
text
1.0
text
_
1.0
_
batch
1.0
batch
=
1.0
=
text
1.0
text
[
1.0
[
i
1.0
i
:
0.96
:
i
0.93
i
+
0.93
+
2
1.0
2
500
1.0
500
]
1.0
]
1.0
inputs
0.64
inputs
=
1.0
=
tokenizer
0.97
tokenizer
(
1.0
(
text
1.0
text
_
1.0
_
batch
1.0
batch
,
1.0
,
return
1.0
return
_
1.0
_
tensors
1.0
tensors
='
1.0
='
pt
0.96
pt
',
1.0
',
0.46
truncation
0.98
truncation
=
1.0
=
True
0.95
True
,
1.0
,
max
1.0
max
_
1.0
_
length
1.0
length
=
1.0
=
1024
0.92
1024
)
1.0
)
1.0
result
1.0
result
=
1.0
=
sorted
0.77
sorted
(
0.99
_
process
0.15
process
_
1.0
_
batch
0.62
text
(
1.0
(
inputs
0.92
inputs
,
1.0
,
mask
1.0
mask
_
1.0
_
ratio
1.0
ratio
,
0.98
,
model
0.64
device
,
1.0
,
0.13
tokenizer
0.87
tokenizer
,
1.0
,
embeddings
1.0
embeddings
_
1.0
_
cache
1.0
cache
),
1.0
,
key
1.0
key
=
1.0
=
lambda
1.0
lambda
x
1.0
x
:
1.0
:
x
0.98
x
['
0.98
['
idx
0.11
idx
'])
1.0
'])
0.98
out
1.0
out
=
1.0
=
out
0.99
out
+
1.0
+
result
1.0
result
0.98
return
1.0
return
out
0.98
out