package ip2region_v2 import ( "encoding/binary" "fmt" "os" ) const ( HeaderInfoLength = 256 VectorIndexRows = 256 VectorIndexCols = 256 VectorIndexSize = 8 SegmentIndexBlockSize = 14 ) // --- Index policy define type IndexPolicy int const ( VectorIndexPolicy IndexPolicy = 1 BTreeIndexPolicy IndexPolicy = 2 ) func (i IndexPolicy) String() string { switch i { case VectorIndexPolicy: return "VectorIndex" case BTreeIndexPolicy: return "BtreeIndex" default: return "unknown" } } // --- Header define type Header struct { // data []byte Version uint16 IndexPolicy IndexPolicy CreatedAt uint32 StartIndexPtr uint32 EndIndexPtr uint32 } func NewHeader(input []byte) (*Header, error) { if len(input) < 16 { return nil, fmt.Errorf("invalid input buffer") } return &Header{ Version: binary.LittleEndian.Uint16(input), IndexPolicy: IndexPolicy(binary.LittleEndian.Uint16(input[2:])), CreatedAt: binary.LittleEndian.Uint32(input[4:]), StartIndexPtr: binary.LittleEndian.Uint32(input[8:]), EndIndexPtr: binary.LittleEndian.Uint32(input[12:]), }, nil } // --- searcher implementation type Searcher struct { handle *os.File // header info header *Header ioCount int // use it only when this feature enabled. // Preload the vector index will reduce the number of IO operations // thus speedup the search process vectorIndex []byte // content buffer. // running with the whole xdb file cached contentBuff []byte } func baseNew(dbFile string, vIndex []byte, cBuff []byte) (*Searcher, error) { var err error // content buff first if cBuff != nil { return &Searcher{ vectorIndex: nil, contentBuff: cBuff, }, nil } // open the xdb binary file handle, err := os.OpenFile(dbFile, os.O_RDONLY, 0600) if err != nil { return nil, err } return &Searcher{ handle: handle, vectorIndex: vIndex, }, nil } func NewWithFileOnly(dbFile string) (*Searcher, error) { return baseNew(dbFile, nil, nil) } func NewWithVectorIndex(dbFile string, vIndex []byte) (*Searcher, error) { return baseNew(dbFile, vIndex, nil) } func NewWithBuffer(cBuff []byte) (*Searcher, error) { return baseNew("", nil, cBuff) } func (s *Searcher) Close() { if s.handle != nil { err := s.handle.Close() if err != nil { return } } } // GetIOCount return the global io count for the last search func (s *Searcher) GetIOCount() int { return s.ioCount } // SearchByStr find the region for the specified ip string func (s *Searcher) SearchByStr(str string) (string, error) { ip, err := CheckIP(str) if err != nil { return "", err } return s.Search(ip) } // Search find the region for the specified long ip func (s *Searcher) Search(ip uint32) (string, error) { // reset the global ioCount s.ioCount = 0 // locate the segment index block based on the vector index var il0 = (ip >> 24) & 0xFF var il1 = (ip >> 16) & 0xFF var idx = il0*VectorIndexCols*VectorIndexSize + il1*VectorIndexSize var sPtr, ePtr = uint32(0), uint32(0) if s.vectorIndex != nil { sPtr = binary.LittleEndian.Uint32(s.vectorIndex[idx:]) ePtr = binary.LittleEndian.Uint32(s.vectorIndex[idx+4:]) } else if s.contentBuff != nil { sPtr = binary.LittleEndian.Uint32(s.contentBuff[HeaderInfoLength+idx:]) ePtr = binary.LittleEndian.Uint32(s.contentBuff[HeaderInfoLength+idx+4:]) } else { // read the vector index block var buff = make([]byte, VectorIndexSize) err := s.read(int64(HeaderInfoLength+idx), buff) if err != nil { return "", fmt.Errorf("read vector index block at %d: %w", HeaderInfoLength+idx, err) } sPtr = binary.LittleEndian.Uint32(buff) ePtr = binary.LittleEndian.Uint32(buff[4:]) } // fmt.Printf("sPtr=%d, ePtr=%d", sPtr, ePtr) // binary search the segment index to get the region var dataLen, dataPtr = 0, uint32(0) var buff = make([]byte, SegmentIndexBlockSize) var l, h = 0, int((ePtr - sPtr) / SegmentIndexBlockSize) for l <= h { m := (l + h) >> 1 p := sPtr + uint32(m*SegmentIndexBlockSize) err := s.read(int64(p), buff) if err != nil { return "", fmt.Errorf("read segment index at %d: %w", p, err) } // decode the data step by step to reduce the unnecessary operations sip := binary.LittleEndian.Uint32(buff) if ip < sip { h = m - 1 } else { eip := binary.LittleEndian.Uint32(buff[4:]) if ip > eip { l = m + 1 } else { dataLen = int(binary.LittleEndian.Uint16(buff[8:])) dataPtr = binary.LittleEndian.Uint32(buff[10:]) break } } } //fmt.Printf("dataLen: %d, dataPtr: %d", dataLen, dataPtr) if dataLen == 0 { return "", nil } // load and return the region data var regionBuff = make([]byte, dataLen) err := s.read(int64(dataPtr), regionBuff) if err != nil { return "", fmt.Errorf("read region at %d: %w", dataPtr, err) } return string(regionBuff), nil } // do the data read operation based on the setting. // content buffer first or will read from the file. // this operation will invoke the Seek for file based read. func (s *Searcher) read(offset int64, buff []byte) error { if s.contentBuff != nil { cLen := copy(buff, s.contentBuff[offset:]) if cLen != len(buff) { return fmt.Errorf("incomplete read: readed bytes should be %d", len(buff)) } } else { _, err := s.handle.Seek(offset, 0) if err != nil { return fmt.Errorf("seek to %d: %w", offset, err) } s.ioCount++ rLen, err := s.handle.Read(buff) if err != nil { return fmt.Errorf("handle read: %w", err) } if rLen != len(buff) { return fmt.Errorf("incomplete read: readed bytes should be %d", len(buff)) } } return nil }