(load "~/.sbclrc") (require 'split-sequence) (defpackage :match (:use :common-lisp :split-sequence)) (in-package :match) (defun read-words (path) "Read bags of words from a file" (with-open-file (stream path) (loop for line = (read-line stream nil) while line collect (split-sequence #\Space (coerce line 'simple-base-string))))) (defun lookup-table (documents) "Constructs a word -> documents lookup table" (let ((word-docs (make-hash-table :test 'equal))) (loop for d in documents for i upfrom 0 do (loop for w in d do (push i (gethash w word-docs)))) word-docs)) (defun intersect-sorted (list1 list2 predicate) "Find an intersection of two sorted lists" (do (list (tail1 list1) (tail2 list2)) ((or (null tail1) (null tail2)) (nreverse list)) (let ((head1 (car tail1)) (head2 (car tail2))) (cond ((funcall predicate head1 head2) (setq tail1 (cdr tail1))) ((funcall predicate head2 head1) (setq tail2 (cdr tail2))) (t (if (or (null list) (not (= head1 (car list)))) (push head1 list)) (setq tail1 (cdr tail1)) (setq tail2 (cdr tail2))))))) (defun match (documents topics) "For each topic, find documents containing all the words from that topic" (let ((word-docs (lookup-table documents))) ;; Strictly speaking, sorting is not required in single-threaded code (maphash (lambda (k v) (setf (gethash k word-docs) (sort v '<))) word-docs) (loop for topic in topics collect (reduce (lambda (x y) (intersect-sorted x y '<)) (mapcar (lambda (w) (gethash w word-docs)) topic))))) (defun main () (let* ((topics (read-words "topics.txt")) (documents (read-words "documents.txt")) (rslt (time (match documents topics)))) ;; Print a few summary statistics (format t "~a~%" (count-if (lambda (d) (> (length d) 0)) rslt)) (format t "~a~%" (reduce 'max rslt :key 'length)))) ;; Run benchmark (main)