|
41 | 41 | from lxml import etree
|
42 | 42 | import onnxruntime
|
43 | 43 | onnxruntime.set_default_logger_severity(3) # 隐藏onnxruntime的日志
|
44 |
| -# import undetected_chromedriver as uc |
| 44 | +import undetected_chromedriver as uc |
45 | 45 | # import pandas as pd
|
46 | 46 | # import numpy
|
47 | 47 | # import pytesseract
|
@@ -116,7 +116,7 @@ def __init__(self, browser_t, id, service, version, event, saveName, config):
|
116 | 116 | self.browser.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', {
|
117 | 117 | 'source': js}) # TMALL 反扒
|
118 | 118 | WebDriverWait(self.browser, 10)
|
119 |
| - self.browser.get('about:blank') |
| 119 | + # self.browser.get('about:blank') |
120 | 120 | self.procedure = service["graph"] # 程序执行流程
|
121 | 121 | try:
|
122 | 122 | self.maxViewLength = service["maxViewLength"] # 最大显示长度
|
@@ -729,7 +729,8 @@ def executeNode(self, nodeId, loopValue="", loopPath="", index=0):
|
729 | 729 | for i in node["sequence"]: # 从根节点开始向下读取
|
730 | 730 | self.executeNode(i, loopValue, loopPath, index)
|
731 | 731 | elif node["option"] == 1: # 打开网页操作
|
732 |
| - self.openPage(node["parameters"], loopValue) |
| 732 | + if not (nodeId == 1 and self.service["cloudflare"] == 1): |
| 733 | + self.openPage(node["parameters"], loopValue) |
733 | 734 | elif node["option"] == 2: # 点击元素
|
734 | 735 | self.clickElement(node["parameters"], loopValue, loopPath, index)
|
735 | 736 | elif node["option"] == 3: # 提取数据
|
@@ -1938,9 +1939,14 @@ def getData(self, para, loopElement, isInLoop=True, parentPath="", index=0):
|
1938 | 1939 | elif cloudflare == 1:
|
1939 | 1940 | if sys.platform == "win32":
|
1940 | 1941 | options.binary_location = "C:\\Program Files\\Google\\Chrome Beta\\Application\\chrome.exe" # 需要用自己的浏览器
|
| 1942 | + # options.add_argument("--auto-open-devtools-for-tabs") |
1941 | 1943 | # options.binary_location = "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe" # 需要用自己的浏览器
|
1942 |
| - browser_t = MyUCChrome( |
1943 |
| - options=options, driver_executable_path=driver_path) |
| 1944 | + browser_t = MyUCChrome(options=options, driver_executable_path=driver_path) |
| 1945 | + links = list(filter(isnotnull, service["links"].split("\n"))) |
| 1946 | + browser_t.execute_script('window.open("'+ links[0] +'","_blank");') # open page in new tab |
| 1947 | + time.sleep(5) # wait until page has loaded |
| 1948 | + browser_t.switch_to.window(browser_t.window_handles[1]) # switch to new tab |
| 1949 | + # browser_t = uc.Chrome() |
1944 | 1950 | else:
|
1945 | 1951 | print("Cloudflare模式只支持Windows x64平台。")
|
1946 | 1952 | print(
|
|
0 commit comments