From ff18ad3fcd05d3199b4745da7ce494ac4721e9fa Mon Sep 17 00:00:00 2001 From: eson Date: Mon, 23 Nov 2020 18:49:37 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E5=90=8E=E6=B2=A1=E5=93=8D?= =?UTF-8?q?=E5=BA=94.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- client.go | 49 ++++++++++++++++++++++++++++++------ client_test.go | 67 ++++++-------------------------------------------- example.js | 6 +++++ 3 files changed, 56 insertions(+), 66 deletions(-) create mode 100644 example.js diff --git a/client.go b/client.go index 6c86dde..10fe850 100644 --- a/client.go +++ b/client.go @@ -8,6 +8,7 @@ import ( "net" "net/http" "net/url" + "os" "sync" "github.com/gin-gonic/gin" @@ -38,7 +39,11 @@ type Client struct { // Label 区分不同任务类型 type Label struct { label string - cli *Client + + conditionJS string + conditionlock sync.Mutex + + cli *Client } // GetHash 根据label获取hash路径 @@ -59,7 +64,30 @@ func (l *Label) GetLabel(hash string) string { // Open 缓存了Label值. 每次调用少了label传参 func (l *Label) Open(urlstr string) (bodyRes string, ok bool) { - return l.cli.Open(l.label, urlstr) + return l.cli.open(l, urlstr) +} + +// SetContentCondition 设置识别到的内容条件. js代码. 必须是一个函数. 命名可以随意. 返回bool +func (l *Label) SetContentCondition(jsScript string) { + l.conditionlock.Lock() + defer l.conditionlock.Unlock() + l.conditionJS = jsScript +} + +// SetContentConditionFromFile 设置识别到的内容条件. js代码. 从js文件 必须是一个函数. 命名可以随意. 返回bool +func (l *Label) SetContentConditionFromFile(jsScriptFile string) { + f, err := os.Open(jsScriptFile) + if err != nil { + panic(err) + } + data, err := ioutil.ReadAll(f) + if err != nil { + panic(err) + } + l.conditionlock.Lock() + defer l.conditionlock.Unlock() + log.Println(string(data)) + l.conditionJS = string(data) } // GetPort Get return port string. default random. @@ -155,19 +183,26 @@ func (cli *Client) Disconnect() { } } -// Open 请求完url后 调用不同label注册的回调函数 -func (cli *Client) Open(label, urlstr string) (bodyRes string, ok bool) { +// open 请求完url后 调用不同label注册的回调函数. bodyRes 请求后服务器返回的基础信息. 如果不需要debug一般不需要使用. +func (cli *Client) open(label *Label, urlstr string) (bodyRes string, ok bool) { // urlstr = "https://playerduo.com/api/playerDuo-service-v2/rip113?lang=en&deviceType=browser" if cli.server == nil { panic("client is not connect. Client.Connect() ? ") } - if callback, ok := cli.register.Load(label); ok { + if callback, ok := cli.register.Load(label.label); ok { data := url.Values{} data["url"] = []string{urlstr} data["callback"] = []string{cli.host + ":" + cli.port + "/" + callback.(Callback).hash} - data["label"] = []string{label} + data["label"] = []string{label.label} + + func() { + label.conditionlock.Lock() + defer label.conditionlock.Unlock() + data["content_script"] = []string{label.conditionJS} + }() + resp, err := http.DefaultClient.PostForm(cli.chromeProxyAddr+"/task/put", data) if err != nil { panic(err) @@ -180,7 +215,7 @@ func (cli *Client) Open(label, urlstr string) (bodyRes string, ok bool) { return string(bodyRes), true } - log.Printf("label: %s is not exists", label) + log.Printf("label: %s is not exists", label.label) return "", false } diff --git a/client_test.go b/client_test.go index ab740b0..b863834 100644 --- a/client_test.go +++ b/client_test.go @@ -1,23 +1,19 @@ package cwclient import ( - "fmt" "log" "net/http" - "os" "testing" - "time" - - "github.com/tebeka/selenium" - "github.com/tebeka/selenium/chrome" ) func TestPort(t *testing.T) { cli := New("http://localhost:7123") + ltest := cli.Register("test", func(tid, content string) { log.Println(tid, content) }) + ltest.SetContentConditionFromFile("example.js") cli.Connect() log.Println(ltest.Open("https://playerduo.com/api/playerDuo-service-v2/rip113?lang=en&deviceType=browser")) log.Println(ltest.Open("https://playerduo.com/api/playerDuo-service-v2/yanngu?lang=en&deviceType=browser")) @@ -27,61 +23,14 @@ func TestPort(t *testing.T) { } func TestChrome(t *testing.T) { - port := 41331 - var err error - caps := selenium.Capabilities{"browserName": "chrome"} - - chromecaps := chrome.Capabilities{} - // for _, epath := range []string{"../../../crx/myblock.crx", "../../crx/myblock.crx", "./crx/myblock.crx"} { - // _, err := os.Stat(epath) - // if err == nil { - // err := chromecaps.AddExtension(epath) - // if err != nil { - // panic(err) - // } - // break - // } - // } - - chromecaps.Args = append(chromecaps.Args, "--disable-blink-features=AutomationControlled") - chromecaps.Args = append(chromecaps.Args, "user-agent=Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36") - if proxy := os.Getenv("chrome_proxy"); proxy != "" { - log.Println("proxy-server", proxy) - chromecaps.Args = append(chromecaps.Args, "--proxy-server="+proxy) + { + defer func() { + log.Println(123) + }() + log.Println(456) } - if proxy := os.Getenv("pac_proxy"); proxy != "" { - log.Println("--proxy-pac-url=" + proxy) - chromecaps.Args = append(chromecaps.Args, "--proxy-pac-url="+proxy) - } + log.Println(321) - // chromecaps.Args = append(chromecaps.Args, "--proxy-pac-url=http://127.0.0.1:1081/pac") - chromecaps.Args = append(chromecaps.Args, "--disk-cache-dir=/tmp/chromedriver-cache") - chromecaps.Args = append(chromecaps.Args, "--user-data-dir=/tmp/chromedriver-userdata") - chromecaps.Args = append(chromecaps.Args, "--auto-open-devtools-for-tabs") - - chromecaps.Args = append(chromecaps.Args, "--disable-gpu", "--disable-images", "--start-maximized", "--disable-infobars") - // chromecaps.Args = append(chromecaps.Args, "--headless") - chromecaps.Args = append(chromecaps.Args, "--no-sandbox") - chromecaps.Args = append(chromecaps.Args, "--disable-dev-shm-usage", "--mute-audio", "--safebrowsing-disable-auto-update") - - chromecaps.ExcludeSwitches = append(chromecaps.ExcludeSwitches, "enable-automation") - caps.AddChrome(chromecaps) - - _, err = selenium.NewChromeDriverService("/usr/bin/chromedriver", port) - if err != nil { - panic(err) - } - - wd, err := selenium.NewRemote(caps, fmt.Sprintf("http://localhost:%d/wd/hub", 9222)) - if err != nil { - panic(err) - } - - err = wd.Get("https://www.amazon.com/") - if err != nil { - log.Println(err) - } - time.Sleep(time.Second * 10) } diff --git a/example.js b/example.js new file mode 100644 index 0000000..2677c00 --- /dev/null +++ b/example.js @@ -0,0 +1,6 @@ + +function condition() { + var href = window.location.href; + var content = document.documentElement.innerHTML; + return href.startsWith("https://playerduo.com") && content.startsWith('
')
+}
\ No newline at end of file