Commit 51cfa48
Changed files (5)
lib
spandx
spec
unit
content
lib/spandx/content/text.rb
@@ -5,19 +5,18 @@ module Spandx
class Text
attr_reader :tokens
- def initialize(content, catalogue)
+ def initialize(content)
@content = content
- @stripper = Stripper.new(catalogue)
@tokens = tokenize(content)
end
def similar?(other)
- score = self <=> other
+ score = dice_coefficient(other)
score > 89.0
end
# https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Dice%27s_coefficient#Ruby
- def <=>(other)
+ def dice_coefficient(other)
overlap = (tokens & other.tokens).size
total = tokens.size + other.tokens.size
100.0 * (overlap * 2.0 / total)
@@ -25,24 +24,15 @@ module Spandx
private
- attr_reader :content, :stripper
+ attr_reader :content
def tokenize(content)
- content = canonicalize(content)
- content = stripper.strip(content)
- content.downcase.scan(/(?:\w(?:'s|(?<=s)')?)+/).to_set
+ canonicalize(content).scan(/(?:\w(?:'s|(?<=s)')?)+/).to_set
end
def canonicalize(content)
- NORMALIZATIONS.each do |key, hash|
- content = content.gsub(hash[:from], hash[:to])
- end
- content
- .gsub(/\b#{Regexp.union(WORDS.keys)}\b/, WORDS)
- .gsub(REGEXES[:bullet], "\n\n* ")
- .gsub(/\)\s+\(/, ')(')
+ content.downcase
end
-
end
end
end
lib/spandx/catalogue.rb
@@ -45,7 +45,7 @@ module Spandx
end
def map_from(license_hash)
- License.new(license_hash, self)
+ License.new(license_hash)
end
def present?(item)
lib/spandx/guess.rb
@@ -10,6 +10,14 @@ module Spandx
@item = item
end
+ def >(other)
+ score > other.score
+ end
+
+ def <(other)
+ score < other.score
+ end
+
def <=>(other)
score <=> other.score
end
@@ -26,12 +34,14 @@ module Spandx
end
def license_for(content)
- this = Content::Text.new(content, catalogue)
- catalogue
- .map { |x| Score.new(this.similar?(x.content), x) }
- .max
- .item
- .id
+ this = Content::Text.new(content)
+
+ max_score = catalogue.map do |license|
+ percentage = this.dice_coefficient(license.content)
+ Score.new(percentage, license)
+ end.max
+
+ max_score.item.id
end
end
end
lib/spandx/license.rb
@@ -14,11 +14,10 @@ module Spandx
end
end
- attr_reader :attributes, :catalogue
+ attr_reader :attributes
- def initialize(attributes = {}, catalogue)
+ def initialize(attributes = {})
@attributes = attributes
- @catalogue = catalogue
end
def id
@@ -74,7 +73,7 @@ module Spandx
end
def content
- @content ||= Content::Text.new(details.text, catalogue)
+ @content ||= Content::Text.new(details.text)
end
def details
spec/unit/content/text_spec.rb
@@ -1,9 +1,8 @@
# frozen_string_literal: true
RSpec.describe Spandx::Content::Text do
- subject { described_class.new(content, catalogue) }
+ subject { described_class.new(content) }
- let(:catalogue) { Spandx::Catalogue.from_file(fixture_file('spdx.json')) }
let(:content) do
license_file('MIT')
.gsub('<year>', Time.now.year.to_s)
@@ -11,9 +10,11 @@ RSpec.describe Spandx::Content::Text do
end
describe "#similar?" do
- let(:mit) { described_class.new(license_file('MIT'), catalogue) }
+ let(:mit) { described_class.new(license_file('MIT')) }
+ let(:lgpl) { described_class.new(license_file('LGPL-2.0')) }
specify { expect(subject.similar?(mit)).to be(true) }
+ specify { expect(subject.similar?(lgpl)).to be(false) }
specify { expect(subject.similar?(subject)).to be(true) }
end
end