Skip to content

Commit 50e9f72

Browse files
committed
spidy Web Crawler Release v1.0
1 parent c5cc42e commit 50e9f72

File tree

3 files changed

+44
-53
lines changed

3 files changed

+44
-53
lines changed

TODO.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@ spidy Web Crawler - TODO List
22

33
- Automatic bug testing - Travis CI?
44
- Windows, MacOS, Linux versions
5-
- Automatic file/folder creation
6-
- Completely self-contained; start with ONLY crawler.py
75
- Working GUI for lulz
86
- Multiple HTTP threads?
97
- Respecting robots.txt

crawler.py

Lines changed: 44 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,16 @@ def get_full_time():
2424
START_TIME_LONG = get_time()
2525

2626
# Get current working directory of spidy
27-
from os import path
27+
from os import path, makedirs
2828

2929
CRAWLER_DIR = path.dirname(path.realpath(__file__))
3030

3131
# Open log file for logging
32+
try:
33+
makedirs('logs\\') # Attempts to make the logs directory
34+
except OSError:
35+
pass # Assumes only OSError wil complain logs/ already exists
36+
3237
LOG_FILE = open('{0}\\logs\\spidy_log_{1}.txt'.format(CRAWLER_DIR, START_TIME), 'w+')
3338
LOG_FILE_NAME = 'logs\\spidy_log_{0}'.format(START_TIME)
3439

@@ -49,7 +54,6 @@ def write_log(message):
4954
import requests
5055
import shutil
5156
from lxml import html, etree
52-
from os import makedirs
5357
from winsound import Beep
5458

5559

@@ -486,16 +490,20 @@ def init():
486490

487491
# Getting Arguments
488492

489-
write_log('[INIT]: Should spidy load settings from an available config file? (y/n):')
490-
input_ = input()
491-
if not bool(input_):
492-
USE_CONFIG = False
493-
elif input_ in yes:
494-
USE_CONFIG = True
495-
elif input_ in no:
493+
if not path.exists('config\\'):
494+
write_log('[INFO]: No config folder available.')
496495
USE_CONFIG = False
497496
else:
498-
handle_invalid_input()
497+
write_log('[INIT]: Should spidy load settings from an available config file? (y/n):')
498+
input_ = input()
499+
if not bool(input_):
500+
USE_CONFIG = False
501+
elif input_ in yes:
502+
USE_CONFIG = True
503+
elif input_ in no:
504+
USE_CONFIG = False
505+
else:
506+
handle_invalid_input()
499507

500508
if USE_CONFIG:
501509
try:
@@ -677,13 +685,19 @@ def init():
677685
else:
678686
write_log('[INIT]: Loading save files...')
679687
# Import saved TODO file data
680-
with open(TODO_FILE, 'r') as f:
681-
contents = f.readlines()
688+
try:
689+
with open(TODO_FILE, 'r') as f:
690+
contents = f.readlines()
691+
except FileNotFoundError: # If no TODO file is present
692+
contents = []
682693
for line in contents:
683694
TODO.append(line.strip())
684695
# Import saved done file data
685-
with open(DONE_FILE, 'r') as f:
686-
contents = f.readlines()
696+
try:
697+
with open(DONE_FILE, 'r') as f:
698+
contents = f.readlines()
699+
except FileNotFoundError: # If no DONE file is present
700+
contents = []
687701
for line in contents:
688702
DONE.append(line.strip())
689703
del contents
@@ -695,10 +709,8 @@ def init():
695709

696710
def main():
697711
"""
698-
The main function or spidy.
712+
The main function of spidy.
699713
"""
700-
init()
701-
702714
# Declare global variables
703715
global VERSION, START_TIME, START_TIME_LONG
704716
global LOG_FILE, LOG_FILE_NAME, ERR_LOG_FILE_NAME
@@ -709,6 +721,21 @@ def main():
709721
global TODO_FILE, DONE_FILE, ERR_LOG_FILE, WORD_FILE, BAD_FILE
710722
global WORDS, TODO, DONE
711723

724+
init()
725+
726+
# Create required saved/ folder
727+
try:
728+
makedirs('saved\\')
729+
except OSError:
730+
pass # Assumes only OSError wil complain saved/ already exists
731+
732+
# Create required files
733+
with open(WORD_FILE, 'w'):
734+
pass
735+
736+
with open(BAD_FILE, 'w'):
737+
pass
738+
712739
write_log('[INIT]: Successfully started spidy Web Crawler version {0}...'.format(VERSION))
713740
log('LOG: Successfully started crawler.')
714741

remove_duplicates.py

Lines changed: 0 additions & 34 deletions
This file was deleted.

0 commit comments

Comments
 (0)