-- Split a string into tokens function split (str) local tokens = {} local i = 0 for token in string.gmatch(str, "[^ ]+") do i = i + 1 tokens[i] = token end return tokens end -- Read bags of words from a file function read_words (path) local data = {} local i = 0 local f = io.open(path, "r") for line in f:lines() do i = i + 1 data[i] = split(line) end f:close() return data end -- Intersect two sets function intersect (s1, s2) local s = {} for k in pairs(s1) do s[k] = s2[k] end return s end -- For each topic, find documents containing all the words from that topic function match (documents, topics) local word_docs = {} for i, d in ipairs(documents) do for _, w in ipairs(d) do local docs = word_docs[w] if docs then docs[i] = true else word_docs[w] = {[i]=true} end end end local rslt = {} for i, t in ipairs(topics) do local s = nil for j, w in ipairs(t) do if j == 1 then s = word_docs[w] or {} else s = intersect(s, word_docs[w] or {}) end end rslt[i] = s end return rslt end -- Compute the size of a table function size (tbl) local s = 0 for _ in pairs(tbl) do s = s + 1 end return s end -- Load and match documents = read_words("documents.txt") topics = read_words("topics.txt") local timer = os.time() rslt = match(documents, topics) print("Elapsed time: " .. (os.time() - timer) .. " s") -- Print a few summary statistics s1 = 0 s2 = 0 for _, d in ipairs(rslt) do local n = size(d) s1 = s1 + (n > 0 and 1 or 0) s2 = math.max(s2, n) end print(s1, s2)