{"id":1326,"date":"2014-04-23T17:00:19","date_gmt":"2014-04-24T00:00:19","guid":{"rendered":"http:\/\/wp.colliertech.org\/cj\/?p=1326"},"modified":"2014-04-24T18:30:46","modified_gmt":"2014-04-25T01:30:46","slug":"really-large-nlp-corpora","status":"publish","type":"post","link":"https:\/\/wp.c9h.org\/cj\/?p=1326","title":{"rendered":"Really large NLP corpora"},"content":{"rendered":"<p>Jeeze people.  You&#8217;re all noisy.  I&#8217;m sure it was all done for posterity&#8217;s sake.<\/p>\n<pre>\r\n23M     irclogs\/MagNET\/#perl.log\r\n29M     irclogs\/freenode\/#mysql.log\r\n36M     irclogs\/freenode\/#debian.log\r\n37M     irclogs\/foonetic\/#xkcd.log\r\n39M     irclogs\/OFTC\/#debian.log\r\n43M     irclogs\/freenode\/#jquery.log\r\n44M     irclogs\/freenode\/#perl.log\r\n\r\n$ for file in irclogs\/MagNET\/#perl.log irclogs\/freenode\/#mysql.log irclogs\/freenode\/#debian.log irclogs\/foonetic\/#xkcd.log irclogs\/OFTC\/#debian.log irclogs\/freenode\/#jquery.log irclogs\/freenode\/#perl.log; do echo -n \"$file: \" ; head -1 $file ; done\r\nirclogs\/MagNET\/#perl.log: --- Log opened Thu May 26 08:31:32 2011\r\nirclogs\/freenode\/#mysql.log: --- Log opened Wed Dec 28 09:03:49 2011\r\nirclogs\/freenode\/#debian.log: --- Log opened Tue Mar 12 12:52:40 2013\r\nirclogs\/foonetic\/#xkcd.log: --- Log opened Wed Dec 28 19:33:43 2011\r\nirclogs\/OFTC\/#debian.log: --- Log opened Tue Jul 12 19:25:48 2011\r\nirclogs\/freenode\/#jquery.log: --- Log opened Tue Jan 31 16:47:51 2012\r\nirclogs\/freenode\/#perl.log: --- Log opened Thu Dec 15 09:31:47 2011\r\n<\/pre>\n\n<div class=\"twitter-share\"><a href=\"https:\/\/twitter.com\/intent\/tweet?via=cjamescollier\" class=\"twitter-share-button\">Tweet<\/a><\/div>\n","protected":false},"excerpt":{"rendered":"<p>Jeeze people. You&#8217;re all noisy. I&#8217;m sure it was all done for posterity&#8217;s sake. 23M irclogs\/MagNET\/#perl.log 29M irclogs\/freenode\/#mysql.log 36M irclogs\/freenode\/#debian.log 37M irclogs\/foonetic\/#xkcd.log 39M irclogs\/OFTC\/#debian.log 43M irclogs\/freenode\/#jquery.log 44M irclogs\/freenode\/#perl.log $ for file in irclogs\/MagNET\/#perl.log irclogs\/freenode\/#mysql.log irclogs\/freenode\/#debian.log irclogs\/foonetic\/#xkcd.log irclogs\/OFTC\/#debian.log irclogs\/freenode\/#jquery.log irclogs\/freenode\/#perl.log; do echo -n &#8220;$file: &#8221; ; head -1 $file ; done irclogs\/MagNET\/#perl.log: &#8212; Log opened Thu [&hellip;]<\/p>\n","protected":false},"author":2,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"jetpack_post_was_ever_published":false,"_jetpack_newsletter_access":"","_jetpack_dont_email_post_to_subs":false,"_jetpack_newsletter_tier_id":0,"_jetpack_memberships_contains_paywalled_content":false,"_jetpack_memberships_contains_paid_content":false,"footnotes":""},"categories":[17,103,181,204,116,12,216,18],"tags":[],"class_list":["post-1326","post","type-post","status-publish","format-standard","hentry","category-debian","category-freenode","category-irc","category-javascript","category-language","category-mysql","category-natural-language-processing","category-perl"],"jetpack_featured_media_url":"","jetpack_shortlink":"https:\/\/wp.me\/p1YDIB-lo","jetpack_sharing_enabled":true,"jetpack_likes_enabled":true,"_links":{"self":[{"href":"https:\/\/wp.c9h.org\/cj\/index.php?rest_route=\/wp\/v2\/posts\/1326","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/wp.c9h.org\/cj\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/wp.c9h.org\/cj\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/wp.c9h.org\/cj\/index.php?rest_route=\/wp\/v2\/users\/2"}],"replies":[{"embeddable":true,"href":"https:\/\/wp.c9h.org\/cj\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=1326"}],"version-history":[{"count":2,"href":"https:\/\/wp.c9h.org\/cj\/index.php?rest_route=\/wp\/v2\/posts\/1326\/revisions"}],"predecessor-version":[{"id":1328,"href":"https:\/\/wp.c9h.org\/cj\/index.php?rest_route=\/wp\/v2\/posts\/1326\/revisions\/1328"}],"wp:attachment":[{"href":"https:\/\/wp.c9h.org\/cj\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=1326"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/wp.c9h.org\/cj\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=1326"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/wp.c9h.org\/cj\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=1326"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}