Skip to content

Commit f567e4e

Browse files
author
naibo
committed
Update cloudflare
1 parent 145c766 commit f567e4e

File tree

2 files changed

+12
-6
lines changed

2 files changed

+12
-6
lines changed

ExecuteStage/.vscode/launch.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
"justMyCode": false,
1313
// "args": ["--id", "[7]", "--read_type", "remote", "--headless", "0"]
1414
// "args": ["--id", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
15-
"args": ["--id", "[85]", "--headless", "0", "--user_data", "1", "--keyboard", "0"]
15+
"args": ["--id", "[25]", "--headless", "0", "--user_data", "0", "--keyboard", "0"]
1616
}
1717
]
1818
}

ExecuteStage/easyspider_executestage.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
from lxml import etree
4242
import onnxruntime
4343
onnxruntime.set_default_logger_severity(3) # 隐藏onnxruntime的日志
44-
# import undetected_chromedriver as uc
44+
import undetected_chromedriver as uc
4545
# import pandas as pd
4646
# import numpy
4747
# import pytesseract
@@ -116,7 +116,7 @@ def __init__(self, browser_t, id, service, version, event, saveName, config):
116116
self.browser.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', {
117117
'source': js}) # TMALL 反扒
118118
WebDriverWait(self.browser, 10)
119-
self.browser.get('about:blank')
119+
# self.browser.get('about:blank')
120120
self.procedure = service["graph"] # 程序执行流程
121121
try:
122122
self.maxViewLength = service["maxViewLength"] # 最大显示长度
@@ -729,7 +729,8 @@ def executeNode(self, nodeId, loopValue="", loopPath="", index=0):
729729
for i in node["sequence"]: # 从根节点开始向下读取
730730
self.executeNode(i, loopValue, loopPath, index)
731731
elif node["option"] == 1: # 打开网页操作
732-
self.openPage(node["parameters"], loopValue)
732+
if not (nodeId == 1 and self.service["cloudflare"] == 1):
733+
self.openPage(node["parameters"], loopValue)
733734
elif node["option"] == 2: # 点击元素
734735
self.clickElement(node["parameters"], loopValue, loopPath, index)
735736
elif node["option"] == 3: # 提取数据
@@ -1938,9 +1939,14 @@ def getData(self, para, loopElement, isInLoop=True, parentPath="", index=0):
19381939
elif cloudflare == 1:
19391940
if sys.platform == "win32":
19401941
options.binary_location = "C:\\Program Files\\Google\\Chrome Beta\\Application\\chrome.exe" # 需要用自己的浏览器
1942+
# options.add_argument("--auto-open-devtools-for-tabs")
19411943
# options.binary_location = "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe" # 需要用自己的浏览器
1942-
browser_t = MyUCChrome(
1943-
options=options, driver_executable_path=driver_path)
1944+
browser_t = MyUCChrome(options=options, driver_executable_path=driver_path)
1945+
links = list(filter(isnotnull, service["links"].split("\n")))
1946+
browser_t.execute_script('window.open("'+ links[0] +'","_blank");') # open page in new tab
1947+
time.sleep(5) # wait until page has loaded
1948+
browser_t.switch_to.window(browser_t.window_handles[1]) # switch to new tab
1949+
# browser_t = uc.Chrome()
19441950
else:
19451951
print("Cloudflare模式只支持Windows x64平台。")
19461952
print(

0 commit comments

Comments
 (0)