Commit 81f8960

mo khan <mo.khan@gmail.com>
2020-05-15 18:13:34
Load entries in index on demand
1 parent 1d7e7d7
lib/spandx/core/cache.rb
@@ -34,6 +34,11 @@ module Spandx
         datafile_for(name).insert(name, version, licenses)
       end
 
+      def insert!(*args)
+        insert(*args)
+        rebuild_index
+      end
+
       def datafile_for(name)
         datafiles.fetch(key_for(name))
       end
lib/spandx/core/data_file.rb
@@ -3,6 +3,8 @@
 module Spandx
   module Core
     class DataFile
+      include Enumerable
+
       attr_reader :absolute_path
 
       def initialize(absolute_path)
@@ -13,9 +15,9 @@ module Spandx
       def each
         return unless exist?
 
-        index.scan do |table|
-          table.each do |row|
-            yield row
+        open_file(mode: 'rb') do |io|
+          while (line = io.gets)
+            yield CsvParser.parse(line)
           end
         end
       end
lib/spandx/core/git.rb
@@ -33,7 +33,7 @@ module Spandx
       end
 
       def clone!
-        system('git', 'clone', '--quiet', url, root)
+        system('git', 'clone', '--quiet', '--depth=1', '--single-branch', '--branch', 'master', url, root)
       end
 
       def pull!
lib/spandx/core/index_file.rb
@@ -11,12 +11,13 @@ module Spandx
       def initialize(data_file)
         @data_file = data_file
         @path = Pathname.new("#{data_file.absolute_path}.idx")
-        @entries = {}
+        @entries = size.positive? ? Array.new(size) : []
       end
 
       def each
-        data.each do |position|
-          yield position
+        total = path.size / UINT_32_SIZE
+        total.times do |n|
+          yield position_for(n)
         end
       end
 
@@ -43,21 +44,19 @@ module Spandx
       end
 
       def size
-         path.exist? ? path.size / UINT_32_SIZE : (data&.size || 0)
+        path.exist? ? path.size / UINT_32_SIZE : 0
       end
 
       def position_for(row_number)
-        data.fetch(row_number)
+        return if row_number > size
 
-        # @entries.fetch(row_number) do |key|
-        # offset = row_number * 2
-        # @entries[key] = IO.read(path, 2, offset, mode: 'rb').unpack1('v')
+        entry = entries[row_number]
+        return entry if entry
 
-        # #@entries[key] = File.open(path, mode: 'rb') do |io|
-        # #io.seek(row_number * 2)
-        # #io.read(2).unpack1('v')
-        # #end
-        # end
+        bytes = IO.binread(path, UINT_32_SIZE, offset_for(row_number))
+        entry = bytes.unpack1(UINT_32_DIRECTIVE)
+        entries[row_number] = entry
+        entry
       end
 
       def scan
@@ -75,8 +74,10 @@ module Spandx
 
       private
 
-      def data
-        @data ||= load
+      attr_reader :entries
+
+      def offset_for(row_number)
+        row_number * UINT_32_SIZE
       end
 
       def sort(data_file)
@@ -93,26 +94,6 @@ module Spandx
         end
       end
 
-      def load
-        return build_index_from_data_file unless path.exist?
-
-        [].tap do |items|
-          each_index do |position|
-            items << position
-          end
-        end
-      end
-
-      def build_index_from_data_file
-        data_file.open_file { |io| lines_in(io) }
-      end
-
-      def each_index
-        File.open(path, mode: 'rb') do |io|
-          yield io.read(UINT_32_SIZE).unpack1(UINT_32_DIRECTIVE) until io.eof?
-        end
-      end
-
       def lines_in(io)
         lines = [0]
         io.seek(0)
lib/spandx/core/relation.rb
@@ -21,7 +21,9 @@ module Spandx
       end
 
       def row(number)
-        offset = index.position_for(number)
+        offset = number.zero? ? 0 : index.position_for(number)
+        return unless offset
+
         io.seek(offset)
         parse_row(io.gets)
       end
spec/integration/core/cache_spec.rb
@@ -31,7 +31,7 @@ RSpec.describe Spandx::Core::Cache do
   include_examples 'each data file', 'rubygems', :rubygems
   include_examples 'each data file', 'nuget', :cache
 
-  describe '#insert' do
+  describe '#insert!' do
     subject { described_class.new('rubygems', root: root_dir) }
 
     let(:root_dir) { Dir.mktmpdir }
@@ -45,7 +45,7 @@ RSpec.describe Spandx::Core::Cache do
       let(:version) { "#{rand(10)}.#{rand(10)}.#{rand(10)}" }
 
       before do
-        subject.insert(dependency_name, version, ['MIT'])
+        subject.insert!(dependency_name, version, ['MIT'])
       end
 
       specify { expect(subject.licenses_for(dependency_name, version)).to match_array(['MIT']) }
@@ -53,32 +53,32 @@ RSpec.describe Spandx::Core::Cache do
 
     context 'when attempting to insert invalid entries' do
       specify do
-        subject.insert(nil, '1.1.1', ['MIT'])
+        subject.insert!(nil, '1.1.1', ['MIT'])
         expect(subject.licenses_for(nil, '1.1.1')).to be_empty
       end
 
       specify do
-        subject.insert('', '1.1.1', ['MIT'])
+        subject.insert!('', '1.1.1', ['MIT'])
         expect(subject.licenses_for('', '1.1.1')).to be_empty
       end
 
       specify do
-        subject.insert('spandx', nil, ['MIT'])
+        subject.insert!('spandx', nil, ['MIT'])
         expect(subject.licenses_for('spandx', nil)).to be_empty
       end
 
       specify do
-        subject.insert('spandx', nil, ['MIT'])
+        subject.insert!('spandx', nil, ['MIT'])
         expect(File.exist?(File.join(root_dir, 'cf', subject.package_manager))).to be(false)
       end
 
       specify do
-        subject.insert('spandx', '', ['MIT'])
+        subject.insert!('spandx', '', ['MIT'])
         expect(subject.licenses_for('spandx', '')).to be_empty
       end
 
       specify do
-        subject.insert('spandx', '', ['MIT'])
+        subject.insert!('spandx', '', ['MIT'])
         expect(File.exist?(File.join(root_dir, 'cf', subject.package_manager))).to be(false)
       end
     end
@@ -141,7 +141,7 @@ RSpec.describe Spandx::Core::Cache do
       let(:root_dir) { Dir.mktmpdir }
 
       before do
-        subject.insert('spandx', '0.0.0', ['MIT'])
+        subject.insert!('spandx', '0.0.0', ['MIT'])
       end
 
       after do
@@ -172,12 +172,6 @@ RSpec.describe Spandx::Core::Cache do
         expect { subject.take(100_000).count }.to perform_under(0.1).sample(10)
       end
 
-      xit 'profiles each' do
-        with_profiler do
-          subject.take(100_000).count
-        end
-      end
-
       xit 'profiles each option' do
         require 'fastest-csv'
 
spec/integration/core/git_spec.rb
@@ -30,7 +30,7 @@ RSpec.describe Spandx::Core::Git do
         subject.update!
       end
 
-      specify { expect(shell).to have_received(:system).with('git', 'clone', '--quiet', url, expected_path) }
+      specify { expect(shell).to have_received(:system).with('git', 'clone', '--quiet', '--depth=1', '--single-branch', '--branch', 'master', url, expected_path) }
     end
 
     context 'when the repository has already been cloned' do
spec/unit/core/index_file_spec.rb
@@ -40,7 +40,7 @@ RSpec.describe Spandx::Core::IndexFile do
 
   describe '#update!' do
     let(:data_file) { Spandx::Core::DataFile.new(path) }
-    let(:path) { File.expand_path(File.join(Dir.home, '.local', 'share', 'spandx-rubygems', '.index', '00', 'rubygems')) }
+    let(:path) { File.expand_path(File.join(Dir.home, '.local', 'share', 'spandx', 'cache', '.index', '00', 'nuget')) }
 
     before do
       subject.update!
@@ -51,5 +51,7 @@ RSpec.describe Spandx::Core::IndexFile do
         expect(item).not_to be_nil
       end
     end
+
+    specify { expect(data_file.count).to be > 1_000 }
   end
 end