[4@sddZddlZddlZdgZGdddZGdddZGdddZdS) a% robotparser.py Copyright (C) 2000 Bastian Kleineidam You can choose between two licenses when using this package: 1) GNU GPLv2 2) PSF license for Python 2.2 The robots.txt Exclusion Protocol is implemented as specified in http://www.robotstxt.org/norobots-rfc.txt NRobotFileParserc@seZdZdZdddZddZddZd d Zd d Zd dZ ddZ ddZ ddZ dS)rzs This class provides a set of methods to read, parse and answer questions about a single robots.txt file. cCs>g|_d|_d|_d|_|j|d|_dS)NFr)entries default_entry disallow_all allow_allset_url last_checked)selfurlr (/usr/lib/python3.4/urllib/robotparser.py__init__s      zRobotFileParser.__init__cCs|jS)zReturns the time the robots.txt file was last fetched. This is useful for long-running web spiders that need to check for new robots.txt files periodically. )r )r r r r mtimeszRobotFileParser.mtimecCsddl}|j|_dS)zYSets the time the robots.txt file was last fetched to the current time. rN)timer )r rr r r modified(s zRobotFileParser.modifiedcCs5||_tjj|dd\|_|_dS)z,Sets the URL referring to a robots.txt file.N)r urllibparseurlparseZhostpath)r r r r r r0s zRobotFileParser.set_urlcCsytjj|j}Wnptjjk r}zJ|jdkrOd|_n*|jdkry|jdkryd|_nWYdd}~Xn)X|j }|j |j dj dS) z4Reads the robots.txt URL and feeds it to the parser.TiiNzutf-8)rr) rZrequestZurlopenr errorZ HTTPErrorcoderrreadrdecode splitlines)r ferrrawr r r r5s  zRobotFileParser.readcCsAd|jkr-|jdkr=||_q=n|jj|dS)N*) useragentsrrappend)r entryr r r _add_entryBszRobotFileParser._add_entrycCsd}t}|jx|D]}|sx|dkrJt}d}qx|dkrx|j|t}d}qxn|jd}|dkr|d|}n|j}|sq n|jdd}t|dkr |djj|ds z+RobotFileParser.__str__..)joinr)r r r r __str__szRobotFileParser.__str__N) __name__ __module__ __qualname____doc__rrrrrr&rr=rCr r r r rs    4 c@s:eZdZdZddZddZddZdS) r2zoA rule line is a single "Allow:" (allowance==True) or "Disallow:" (allowance==False) followed by a path.cCs\|dkr| rd}ntjjtjj|}tjj||_||_dS)NrT)rrr8rr9rr;)r rr;r r r rs  zRuleLine.__init__cCs|jdkp|j|jS)Nr")r startswith)r filenamer r r r:szRuleLine.applies_tocCs|jrdpdd|jS)NZAllowZDisallowz: )r;r)r r r r rCszRuleLine.__str__N)rDrErFrGrr:rCr r r r r2s   r2c@sFeZdZdZddZddZddZdd Zd S) r*z?An entry has one or more user-agents and zero or more rulelinescCsg|_g|_dS)N)r#r1)r r r r rs zEntry.__init__cCsjg}x'|jD]}|jd|dgqWx*|jD]}|jt|dgq:Wdj|S)Nz User-agent: r>r)r#extendr1r?rB)r Zretagentr5r r r rCs z Entry.__str__cCs]|jddj}x=|jD]2}|dkr9dS|j}||kr#dSq#WdS)z2check if this entry applies to the specified agentr7rr"TF)r-r/r#)r r<rKr r r r:s   zEntry.applies_tocCs.x'|jD]}|j|r |jSq WdS)zZPreconditions: - our agent applies to this entry - filename is URL decodedT)r1r:r;)r rIr5r r r r;s zEntry.allowanceN)rDrErFrGrrCr:r;r r r r r*s    r*)rGZ urllib.parserZurllib.request__all__rr2r*r r r r  s