You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

463 lines
16 KiB

Package mft provides functions to parse records and their attributes in an NTFS Master File Table ("MFT" for short).
Basic usage
First parse a record using mft.ParseRecord(), which parses the record header and the attribute headers. Then parse
each attribute's data individually using the various mft.Parse...() functions.
// Error handling left out for brevity
record, err := mft.ParseRecord()
attrs, err := record.FindAttributes(mft.AttributeTypeFileName)
fileName, err := mft.ParseFileName(attrs[0])
package mft
import (
var (
fileSignature = []byte{0x46, 0x49, 0x4c, 0x45}
const maxInt = int64(^uint(0) >> 1)
// A Record represents an MFT entry, excluding all technical data (such as "offset to first attribute"). The Attributes
// list only contains the attribute headers and raw data; the attribute data has to be parsed separately. When this is a
// base record, the BaseRecordReference will be zero. When it is an extension record, the BaseRecordReference points to
// the record's base record.
type Record struct {
Signature []byte
FileReference FileReference
BaseRecordReference FileReference
LogFileSequenceNumber uint64
HardLinkCount int
Flags RecordFlag
ActualSize uint32
AllocatedSize uint32
NextAttributeId int
Attributes []Attribute
// ParseRecord parses bytes into a Record after applying fixup. The data is assumed to be in Little Endian order. Only
// the attribute headers are parsed, not the actual attribute data.
func ParseRecord(b []byte) (Record, error) {
if len(b) < 42 {
return Record{}, fmt.Errorf("record data length should be at least 42 but is %d", len(b))
sig := b[:4]
if bytes.Compare(sig, fileSignature) != 0 {
return Record{}, fmt.Errorf("unknown record signature: %# x", sig)
b = binutil.Duplicate(b)
r := binutil.NewLittleEndianReader(b)
baseRecordRef, err := ParseFileReference(r.Read(0x20, 8))
if err != nil {
return Record{}, fmt.Errorf("unable to parse base record reference: %v", err)
firstAttributeOffset := int(r.Uint16(0x14))
if firstAttributeOffset < 0 || firstAttributeOffset >= len(b) {
return Record{}, fmt.Errorf("invalid first attribute offset %d (data length: %d)", firstAttributeOffset, len(b))
updateSequenceOffset := int(r.Uint16(0x04))
updateSequenceSize := int(r.Uint16(0x06))
b, err = applyFixUp(b, updateSequenceOffset, updateSequenceSize)
if err != nil {
return Record{}, fmt.Errorf("unable to apply fixup: %v", err)
attributes, err := ParseAttributes(b[firstAttributeOffset:])
if err != nil {
return Record{}, err
return Record{
Signature: binutil.Duplicate(sig),
FileReference: FileReference{RecordNumber: uint64(r.Uint32(0x2C)), SequenceNumber: r.Uint16(0x10)},
BaseRecordReference: baseRecordRef,
LogFileSequenceNumber: r.Uint64(0x08),
HardLinkCount: int(r.Uint16(0x12)),
Flags: RecordFlag(r.Uint16(0x16)),
ActualSize: r.Uint32(0x18),
AllocatedSize: r.Uint32(0x1C),
NextAttributeId: int(r.Uint16(0x28)),
Attributes: attributes,
}, nil
// A FileReference represents a reference to an MFT record. Since the FileReference in a Record is only 4 bytes, the
// RecordNumber will probably not exceed 32 bits.
type FileReference struct {
RecordNumber uint64
SequenceNumber uint16
func (f FileReference) ToUint64() uint64 {
origin := make([]byte, 8)
binary.LittleEndian.PutUint16(origin, f.SequenceNumber)
origin[6] = origin[0]
origin[7] = origin[1]
binary.LittleEndian.PutUint32(origin, uint32(f.RecordNumber))
return binary.LittleEndian.Uint64(origin)
// ParseFileReference parses a Little Endian ordered 8-byte slice into a FileReference. The first 6 bytes indicate the
// record number, while the final 2 bytes indicate the sequence number.
func ParseFileReference(b []byte) (FileReference, error) {
if len(b) != 8 {
return FileReference{}, fmt.Errorf("expected 8 bytes but got %d", len(b))
return FileReference{
RecordNumber: binary.LittleEndian.Uint64(padTo(b[:6], 8)),
SequenceNumber: binary.LittleEndian.Uint16(b[6:]),
}, nil
// RecordFlag represents a bit mask flag indicating the status of the MFT record.
type RecordFlag uint16
// Bit values for the RecordFlag. For example, an in-use directory has value 0x0003.
const (
RecordFlagInUse RecordFlag = 0x0001
RecordFlagIsDirectory RecordFlag = 0x0002
RecordFlagInExtend RecordFlag = 0x0004
RecordFlagIsIndex RecordFlag = 0x0008
// Is checks if this RecordFlag's bit mask contains the specified flag.
func (f *RecordFlag) Is(c RecordFlag) bool {
return *f&c == c
func applyFixUp(b []byte, offset int, length int) ([]byte, error) {
r := binutil.NewLittleEndianReader(b)
updateSequence := r.Read(offset, length*2) // length is in pairs, not bytes
updateSequenceNumber := updateSequence[:2]
updateSequenceArray := updateSequence[2:]
sectorCount := len(updateSequenceArray) / 2
sectorSize := len(b) / sectorCount
for i := 1; i <= sectorCount; i++ {
offset := sectorSize*i - 2
if bytes.Compare(updateSequenceNumber, b[offset:offset+2]) != 0 {
return nil, fmt.Errorf("update sequence mismatch at pos %d", offset)
for i := 0; i < sectorCount; i++ {
offset := sectorSize*(i+1) - 2
num := i * 2
copy(b[offset:offset+2], updateSequenceArray[num:num+2])
return b, nil
// FindAttributes returns all attributes of the specified type contained in this record. When no matches are found an
// empty slice is returned.
func (r *Record) FindAttributes(attrType AttributeType) []Attribute {
ret := make([]Attribute, 0)
for _, a := range r.Attributes {
if a.Type == attrType {
ret = append(ret, a)
return ret
// Attribute represents an MFT record attribute header and its corresponding raw attribute Data (excluding header data).
// When the attribute is Resident, the Data contains the actual attribute's data. When the attribute is non-resident,
// the Data contains DataRuns pointing to the actual data. DataRun data can be parsed using ParseDataRuns().
type Attribute struct {
Type AttributeType
Resident bool
Name string
Flags AttributeFlags
AttributeId int
AllocatedSize uint64
ActualSize uint64
Data []byte
// AttributeType represents the type of an Attribute. Use Name() to get the attribute type's name.
type AttributeType uint32
// Known values for AttributeType. Note that other values might occur too.
const (
AttributeTypeStandardInformation AttributeType = 0x10 // $STANDARD_INFORMATION; always resident
AttributeTypeAttributeList AttributeType = 0x20 // $ATTRIBUTE_LIST; mixed residency
AttributeTypeFileName AttributeType = 0x30 // $FILE_NAME; always resident
AttributeTypeObjectId AttributeType = 0x40 // $OBJECT_ID; always resident
AttributeTypeSecurityDescriptor AttributeType = 0x50 // $SECURITY_DESCRIPTOR; always resident?
AttributeTypeVolumeName AttributeType = 0x60 // $VOLUME_NAME; always resident?
AttributeTypeVolumeInformation AttributeType = 0x70 // $VOLUME_INFORMATION; never resident?
AttributeTypeData AttributeType = 0x80 // $DATA; mixed residency
AttributeTypeIndexRoot AttributeType = 0x90 // $INDEX_ROOT; always resident
AttributeTypeIndexAllocation AttributeType = 0xa0 // $INDEX_ALLOCATION; never resident?
AttributeTypeBitmap AttributeType = 0xb0 // $BITMAP; nearly always resident?
AttributeTypeReparsePoint AttributeType = 0xc0 // $REPARSE_POINT; always resident?
AttributeTypeEAInformation AttributeType = 0xd0 // $EA_INFORMATION; always resident
AttributeTypeEA AttributeType = 0xe0 // $EA; nearly always resident?
AttributeTypePropertySet AttributeType = 0xf0 // $PROPERTY_SET
AttributeTypeLoggedUtilityStream AttributeType = 0x100 // $LOGGED_UTILITY_STREAM; always resident
AttributeTypeTerminator AttributeType = 0xFFFFFFFF // Indicates the last attribute in a list; will not actually be returned by ParseAttributes
// AttributeFlags represents a bit mask flag indicating various properties of an attribute's data.
type AttributeFlags uint16
// Bit values for the AttributeFlags. For example, an encrypted, compressed attribute has value 0x4001.
const (
AttributeFlagsCompressed AttributeFlags = 0x0001
AttributeFlagsEncrypted AttributeFlags = 0x4000
AttributeFlagsSparse AttributeFlags = 0x8000
// Is checks if this AttributeFlags's bit mask contains the specified flag.
func (f *AttributeFlags) Is(c AttributeFlags) bool {
return *f&c == c
// ParseAttributes parses bytes into Attributes. The data is assumed to be in Little Endian order. Only the attribute
// headers are parsed, not the actual attribute data.
func ParseAttributes(b []byte) ([]Attribute, error) {
if len(b) == 0 {
return []Attribute{}, nil
attributes := make([]Attribute, 0)
for len(b) > 0 {
if len(b) < 4 {
return nil, fmt.Errorf("attribute header data should be at least 4 bytes but is %d", len(b))
r := binutil.NewLittleEndianReader(b)
attrType := r.Uint32(0)
if attrType == uint32(AttributeTypeTerminator) {
if len(b) < 8 {
return nil, fmt.Errorf("cannot read attribute header record length, data should be at least 8 bytes but is %d", len(b))
uRecordLength := r.Uint32(0x04)
if int64(uRecordLength) > maxInt {
return nil, fmt.Errorf("record length %d overflows maximum int value %d", uRecordLength, maxInt)
recordLength := int(uRecordLength)
if recordLength <= 0 {
return nil, fmt.Errorf("cannot handle attribute with zero or negative record length %d", recordLength)
if recordLength > len(b) {
return nil, fmt.Errorf("attribute record length %d exceeds data length %d", recordLength, len(b))
recordData := r.Read(0, recordLength)
attribute, err := ParseAttribute(recordData)
if err != nil {
return nil, err
attributes = append(attributes, attribute)
b = r.ReadFrom(recordLength)
return attributes, nil
// ParseAttribute parses bytes into an Attribute. The data is assumed to be in Little Endian order. Only the attribute
// headers are parsed, not the actual attribute data.
func ParseAttribute(b []byte) (Attribute, error) {
if len(b) < 22 {
return Attribute{}, fmt.Errorf("attribute data should be at least 22 bytes but is %d", len(b))
r := binutil.NewLittleEndianReader(b)
nameLength := r.Byte(0x09)
nameOffset := r.Uint16(0x0A)
name := ""
if nameLength != 0 {
nameBytes := r.Read(int(nameOffset), int(nameLength)*2)
name = utf16.DecodeString(nameBytes, binary.LittleEndian)
resident := r.Byte(0x08) == 0x00
var attributeData []byte
actualSize := uint64(0)
allocatedSize := uint64(0)
if resident {
dataOffset := int(r.Uint16(0x14))
uDataLength := r.Uint32(0x10)
if int64(uDataLength) > maxInt {
return Attribute{}, fmt.Errorf("attribute data length %d overflows maximum int value %d", uDataLength, maxInt)
dataLength := int(uDataLength)
expectedDataLength := dataOffset + dataLength
if len(b) < expectedDataLength {
return Attribute{}, fmt.Errorf("expected attribute data length to be at least %d but is %d", expectedDataLength, len(b))
attributeData = r.Read(dataOffset, dataLength)
} else {
dataOffset := int(r.Uint16(0x20))
if len(b) < dataOffset {
return Attribute{}, fmt.Errorf("expected attribute data length to be at least %d but is %d", dataOffset, len(b))
allocatedSize = r.Uint64(0x28)
actualSize = r.Uint64(0x30)
attributeData = r.ReadFrom(int(dataOffset))
return Attribute{
Type: AttributeType(r.Uint32(0)),
Resident: resident,
Name: name,
Flags: AttributeFlags(r.Uint16(0x0C)),
AttributeId: int(r.Uint16(0x0E)),
AllocatedSize: allocatedSize,
ActualSize: actualSize,
Data: binutil.Duplicate(attributeData),
}, nil
// A DataRun represents a fragment of data somewhere on a volume. The OffsetCluster, which can be negative, is relative
// to a previous DataRun's offset. The OffsetCluster of the first DataRun in a list is relative to the beginning of the
// volume.
type DataRun struct {
OffsetCluster int64
LengthInClusters uint64
// ParseDataRuns parses bytes into a list of DataRuns. Each DataRun's OffsetCluster is relative to the DataRun before
// it. The first element's OffsetCluster is relative to the beginning of the volume.
func ParseDataRuns(b []byte) ([]DataRun, error) {
if len(b) == 0 {
return []DataRun{}, nil
runs := make([]DataRun, 0)
for len(b) > 0 {
r := binutil.NewLittleEndianReader(b)
header := r.Byte(0)
if header == 0 {
lengthLength := int(header &^ 0xF0)
offsetLength := int(header >> 4)
dataRunDataLength := offsetLength + lengthLength
headerAndDataLength := dataRunDataLength + 1
if len(b) < headerAndDataLength {
return nil, fmt.Errorf("expected at least %d bytes of datarun data but is %d", headerAndDataLength, len(b))
dataRunData := r.Reader(1, dataRunDataLength)
lengthBytes := dataRunData.Read(0, lengthLength)
dataLength := binary.LittleEndian.Uint64(padTo(lengthBytes, 8))
offsetBytes := dataRunData.Read(lengthLength, offsetLength)
dataOffset := int64(binary.LittleEndian.Uint64(padTo(offsetBytes, 8)))
runs = append(runs, DataRun{OffsetCluster: dataOffset, LengthInClusters: dataLength})
b = r.ReadFrom(headerAndDataLength)
return runs, nil
// DataRunsToFragments transform a list of DataRuns with relative offsets and lengths specified in cluster into a list
// of fragment.Fragment elements with absolute offsets and lengths specified in bytes (for example for use in a
// fragment.Reader). Note that data will probably not align to a cluster exactly so there could be some padding at the
// end. It is up to the user of the Fragments to limit reads to actual data size (eg. by using an io.LimitedReader or
// modifying the last element in the list to limit its length).
func DataRunsToFragments(runs []DataRun, bytesPerCluster int) []fragment.Fragment {
frags := make([]fragment.Fragment, len(runs))
previousOffsetCluster := int64(0)
for i, run := range runs {
exactClusterOffset := previousOffsetCluster + run.OffsetCluster
frags[i] = fragment.Fragment{
Offset: exactClusterOffset * int64(bytesPerCluster),
Length: int64(run.LengthInClusters) * int64(bytesPerCluster),
previousOffsetCluster = exactClusterOffset
return frags
func padTo(data []byte, length int) []byte {
if len(data) > length {
return data
if len(data) == length {
return data
result := make([]byte, length)
if len(data) == 0 {
return result
copy(result, data)
if data[len(data)-1]&0b10000000 == 0b10000000 {
for i := len(data); i < length; i++ {
result[i] = 0xFF
return result
// Name returns a string representation of the attribute type. For example "$STANDARD_INFORMATION" or "$FILE_NAME". For
// anyte attribute type which is unknown, Name will return "unknown".
func (at AttributeType) Name() string {
switch at {
case AttributeTypeStandardInformation:
case AttributeTypeAttributeList:
case AttributeTypeFileName:
return "$FILE_NAME"
case AttributeTypeObjectId:
return "$OBJECT_ID"
case AttributeTypeSecurityDescriptor:
case AttributeTypeVolumeName:
return "$VOLUME_NAME"
case AttributeTypeVolumeInformation:
case AttributeTypeData:
return "$DATA"
case AttributeTypeIndexRoot:
return "$INDEX_ROOT"
case AttributeTypeIndexAllocation:
case AttributeTypeBitmap:
return "$BITMAP"
case AttributeTypeReparsePoint:
case AttributeTypeEAInformation:
case AttributeTypeEA:
return "$EA"
case AttributeTypePropertySet:
return "$PROPERTY_SET"
case AttributeTypeLoggedUtilityStream:
return "unknown"