-#
-# $Id: robot.tcl,v 1.1 1996/08/06 14:04:22 adam Exp $
+#!/usr/bin/tclsh
+# $Id: robot.tcl,v 1.2 1998/10/15 12:31:03 adam Exp $
#
proc RobotFileNext {area} {
- if {[catch {set ns [glob $area/*]}]} {
+ if {[catch {set ns [glob ${area}/*]}]} {
return {}
}
set off [string first / $area]
set out [RobotFileOpen visited $URL($url,host) $URL($url,path)]
set ti 0
if {[info exists URL($url,line)]} {
- set htmlContent [join $URL($url,line)]
+ set htmlContent [join $URL($url,line) \n]
htmlSwitch $htmlContent \
- title {
+ title {
if {!$ti} {
headSave $url $out $body
set ti 1
}
+ } body {
+ regsub -all -nocase {<script.*</script>} $body {} abody
+ regsub -all {<[^\>]+>} $abody {} nbody
+ puts $out "<body>"
+ puts $out $nbody
+ puts $out "</body>"
} a {
- if {![info exists parm(href)]} continue
+ if {![info exists parm(href)]} {
+ puts "no href"
+ continue
+ }
if {!$ti} {
headSave $url $out "untitled"
set ti 1
set host $URL($url,host)
set path $hpath
}
- if {![regexp {\.dk$} $host]} continue
+ if {![regexp {\.indexdata\.dk$} $host]} continue
} else {
continue
}
set host $URL($url,host)
set method http
} else {
- puts " href=$parm(href)"
set ext [file extension $URL($url,path)]
if {[string compare $ext {}]} {
set dpart [file dirname $URL($url,path)]
}
}
} else {
- set URL($url,state) skip
+ set URL($url,state) html
if {[info exists URL($url,head,Content-type)]} {
if {![string compare $URL($url,head,Content-type) text/html]} {
set URL($url,state) html
return 0
}
-#RobotGetUrl http://www.dtv.dk/ {}
+if {![llength [info commands htmlSwitch]]} {
+ set e [info sharedlibextension]
+ if {[catch {load ./tclrobot$e}]} {
+ load tclrobot$e
+ }
+}
+
+if {![llength $argv]} {
+ puts "Tclrobot: specify one or more sites."
+ exit 1
+}
+foreach site $argv {
+ set x [RobotFileOpen unvisited $site /]
+ close $x
+}
RobotRestart
vwait forever