+#!/usr/bin/tclsh
+# $Id: dcdot.tcl,v 1.1 2000/12/07 20:16:11 adam Exp $
+#
+
+proc RobotRestart {} {
+ global robotMoreWork
+
+ set robotMoreWork 0
+}
+
+proc RobotTextHtml {url} {
+ global URL
+
+ set head 0
+ htmlSwitch $URL($url,buf) \
+ title {
+ set URL($url,title) $body
+ } -nonest meta {
+ set scheme {}
+ if {[info exist parm(scheme)]} {
+ set scheme $parm(scheme)
+ unset parm(scheme)
+ }
+ if {[info exist parm(name)]} {
+ if {[info exist parm(content)]} {
+ set URL($url,meta,$parm(name),$scheme) $parm(content)
+ unset parm(content)
+ }
+ unset parm(name)
+ }
+ } a {
+ if {[info exists parm(href)]} {
+ lappend URL($url,links) $parm(href)
+ }
+ }
+}
+
+proc Robot200 {url} {
+ global URL domains
+
+ # puts "Parsing $url"
+ switch $URL($url,head,content-type) {
+ text/html {
+ RobotTextHtml $url
+ }
+ }
+ # puts "Parsing done"
+}
+
+proc RobotReadContent {url sock} {
+ global URL
+
+ set buffer [read $sock 16384]
+ set readCount [string length $buffer]
+
+ if {$readCount <= 0} {
+ close $sock
+ Robot200 $url
+ RobotRestart
+ } else {
+ # puts "Got $readCount bytes"
+ set URL($url,buf) $URL($url,buf)$buffer
+ }
+}
+
+proc RobotReadHeader {url sock} {
+ global URL
+
+ set buffer [read $sock 2148]
+ set readCount [string length $buffer]
+
+ if {$readCount <= 0} {
+ close $sock
+ RobotRestart
+ } else {
+ # puts "Got $readCount bytes"
+ set URL($url,buf) $URL($url,buf)$buffer
+
+ set n [string first \n\n $URL($url,buf)]
+ if {$n > 1} {
+ set code 0
+ set version {}
+ set headbuf [string range $URL($url,buf) 0 $n]
+ incr n
+ incr n
+ set URL($url,buf) [string range $URL($url,buf) $n end]
+
+ regexp {^HTTP/([0-9.]+)[ ]+([0-9]+)} $headbuf x version code
+ set lines [split $headbuf \n]
+ foreach line $lines {
+ if {[regexp {^([^:]+):[ ]+(.*)} $line x name value]} {
+ set URL($url,head,[string tolower $name]) $value
+ }
+ }
+ set URL($url,state) skip
+ switch $code {
+ 200 {
+ if {![info exists URL($url,head,content-type)]} {
+ set URL($url,head,content-type) {}
+ }
+ switch $URL($url,head,content-type) {
+ text/html {
+ fileevent $sock readable [list RobotReadContent $url $sock]
+ }
+ text/plain {
+ fileevent $sock readable [list RobotReadContent $url $sock]
+ }
+ default {
+ close $sock
+ Robot200 $url
+ RobotRestart
+ }
+ }
+ }
+ default {
+ Robot404 $url
+ close $sock
+ RobotRestart
+ }
+ }
+ }
+ }
+}
+
+proc RobotConnect {url sock} {
+ global URL agent
+
+ fconfigure $sock -translation {auto crlf} -blocking 0
+ fileevent $sock readable [list RobotReadHeader $url $sock]
+ puts $sock "GET $URL($url,path) HTTP/1.0"
+ puts $sock "Host: $URL($url,host)"
+ puts $sock "User-Agent: $agent"
+ puts $sock ""
+ flush $sock
+}
+
+proc RobotGetUrl {url phost} {
+ global URL
+ if {![regexp {([^:]+)://([^/]+)([^ ]*)} $url x method hostport path]} {
+ return -1
+ }
+ if {![regexp {([^:]+):([0-9]+)} $hostport x host port]} {
+ set port 80
+ set host $hostport
+ }
+ set URL($url,method) $method
+ set URL($url,host) $host
+ set URL($url,port) $port
+ set URL($url,path) $path
+ set URL($url,state) head
+ set URL($url,buf) {}
+ if [catch {set sock [socket -async $host $port]}] {
+ return -1
+ }
+ RobotConnect $url $sock
+
+ return 0
+}
+
+if {![llength [info commands htmlSwitch]]} {
+ set e [info sharedlibextension]
+ if {[catch {load ./tclrobot$e}]} {
+ load tclrobot$e
+ }
+}
+
+set agent "zmbot/0.0"
+if {![catch {set os [exec uname -s -r]}]} {
+ set agent "$agent ($os)"
+}
+
+proc RobotGetDCDOT {url} {
+ global robotMoreWork 1
+
+ set robotMoreWork 1
+ if [RobotGetUrl $url {}] {
+ set robotMoreWork 0
+ }
+
+ while {$robotMoreWork} {
+ vwait robotMoreWork
+ }
+}
+
+if {$argc == 1} {
+ set url [lindex $argv 0]
+ RobotGetDCDOT $url
+ set mask {,meta,[Dd][Cc]\.*}
+ foreach a [array names URL $url$mask] {
+ puts "URL($a) = $URL($a)"
+ }
+}
\ No newline at end of file