commit c1bee6544cb0f7ce7b1e746e2c1938f24a968519 Author: huangsimin Date: Wed Nov 21 18:17:05 2018 +0800 v0.0.1 测试版本 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..294be7a --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.pyc +*.vscode diff --git a/base.go b/base.go new file mode 100644 index 0000000..c48184e --- /dev/null +++ b/base.go @@ -0,0 +1,8 @@ +package curl2info + +// CheckError 检查错误 +func CheckError(err error) { + if err != nil { + panic(err) + } +} diff --git a/cookie.go b/cookie.go new file mode 100644 index 0000000..9d97a96 --- /dev/null +++ b/cookie.go @@ -0,0 +1,230 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package curl2info + +import ( + "net" + "net/http" + "strings" +) + +var isTokenTable = [127]bool{ + '!': true, + '#': true, + '$': true, + '%': true, + '&': true, + '\'': true, + '*': true, + '+': true, + '-': true, + '.': true, + '0': true, + '1': true, + '2': true, + '3': true, + '4': true, + '5': true, + '6': true, + '7': true, + '8': true, + '9': true, + 'A': true, + 'B': true, + 'C': true, + 'D': true, + 'E': true, + 'F': true, + 'G': true, + 'H': true, + 'I': true, + 'J': true, + 'K': true, + 'L': true, + 'M': true, + 'N': true, + 'O': true, + 'P': true, + 'Q': true, + 'R': true, + 'S': true, + 'T': true, + 'U': true, + 'W': true, + 'V': true, + 'X': true, + 'Y': true, + 'Z': true, + '^': true, + '_': true, + '`': true, + 'a': true, + 'b': true, + 'c': true, + 'd': true, + 'e': true, + 'f': true, + 'g': true, + 'h': true, + 'i': true, + 'j': true, + 'k': true, + 'l': true, + 'm': true, + 'n': true, + 'o': true, + 'p': true, + 'q': true, + 'r': true, + 's': true, + 't': true, + 'u': true, + 'v': true, + 'w': true, + 'x': true, + 'y': true, + 'z': true, + '|': true, + '~': true, +} + +func isTokenRune(r rune) bool { + i := int(r) + return i < len(isTokenTable) && isTokenTable[i] +} + +// ReadRawCookies parses all "Cookie" values from the rawcookie and +// returns the successfully parsed Cookies. +// +// if filter isn't empty, only cookies of that name are returned +func ReadRawCookies(soptions string, filter string) []*http.Cookie { + + line := soptions + + cookies := []*http.Cookie{} + + parts := strings.Split(strings.TrimSpace(line), ";") + if len(parts) == 1 && parts[0] == "" { + return cookies + } + // Per-line attributes + for i := 0; i < len(parts); i++ { + parts[i] = strings.TrimSpace(parts[i]) + if len(parts[i]) == 0 { + continue + } + name, val := parts[i], "" + if j := strings.Index(name, "="); j >= 0 { + name, val = name[:j], name[j+1:] + } + if !isCookieNameValid(name) { + continue + } + if filter != "" && filter != name { + continue + } + val, ok := parseCookieValue(val, true) + if !ok { + continue + } + cookies = append(cookies, &http.Cookie{Name: name, Value: val}) + } + + return cookies +} + +// validCookieDomain returns whether v is a valid cookie domain-value. +func validCookieDomain(v string) bool { + if isCookieDomainName(v) { + return true + } + if net.ParseIP(v) != nil && !strings.Contains(v, ":") { + return true + } + return false +} + +// isCookieDomainName returns whether s is a valid domain name or a valid +// domain name with a leading dot '.'. It is almost a direct copy of +// package net's isDomainName. +func isCookieDomainName(s string) bool { + if len(s) == 0 { + return false + } + if len(s) > 255 { + return false + } + + if s[0] == '.' { + // A cookie a domain attribute may start with a leading dot. + s = s[1:] + } + last := byte('.') + ok := false // Ok once we've seen a letter. + partlen := 0 + for i := 0; i < len(s); i++ { + c := s[i] + switch { + default: + return false + case 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z': + // No '_' allowed here (in contrast to package net). + ok = true + partlen++ + case '0' <= c && c <= '9': + // fine + partlen++ + case c == '-': + // Byte before dash cannot be dot. + if last == '.' { + return false + } + partlen++ + case c == '.': + // Byte before dot cannot be dot, dash. + if last == '.' || last == '-' { + return false + } + if partlen > 63 || partlen == 0 { + return false + } + partlen = 0 + } + last = c + } + if last == '-' || partlen > 63 { + return false + } + + return ok +} + +func validCookieValueByte(b byte) bool { + return 0x20 <= b && b < 0x7f && b != '"' && b != ';' && b != '\\' +} + +func parseCookieValue(raw string, allowDoubleQuote bool) (string, bool) { + // Strip the quotes, if present. + if allowDoubleQuote && len(raw) > 1 && raw[0] == '"' && raw[len(raw)-1] == '"' { + raw = raw[1 : len(raw)-1] + } + for i := 0; i < len(raw); i++ { + if !validCookieValueByte(raw[i]) { + return "", false + } + } + return raw, true +} + +func isCookieNameValid(raw string) bool { + if raw == "" { + return false + } + return strings.IndexFunc(raw, isNotToken) < 0 +} + +func isNotToken(r rune) bool { + return !isTokenRune(r) +} diff --git a/parse_curl.go b/parse_curl.go new file mode 100644 index 0000000..eec842b --- /dev/null +++ b/parse_curl.go @@ -0,0 +1,228 @@ +package curl2info + +import ( + "errors" + "fmt" + "io/ioutil" + "net/http" + "net/http/cookiejar" + "net/url" + "os" + "regexp" + "strings" + + "github.com/474420502/requests" +) + +// CURL 信息结构 +type CURL struct { + ParsedURL *url.URL + Method string + Header http.Header + CookieJar http.CookieJar + Cookies []*http.Cookie + Body *requests.Body +} + +// NewCURL new 一个 curl 出来 +func NewCURL(scurl ...string) *CURL { + + if len(scurl) != 0 { + if len(scurl) > 1 { + panic(errors.New("NewCURL only accept one curl info")) + } + + curl, err := ParseRawCURL(scurl[0]) + if err != nil { + panic(err) + } + return curl + + } + + u := &CURL{} + u.Header = make(http.Header) + u.CookieJar, _ = cookiejar.New(nil) + u.Body = requests.NewBody() + + return u +} + +func (curl *CURL) String() string { + return fmt.Sprintf("Method: %s\nParsedURL: %s\nHeader: %s\nCookie: %s", + curl.Method, curl.ParsedURL.String(), curl.Header, curl.Cookies) +} + +// CreateSession 创建Session +func (curl *CURL) CreateSession() *requests.Session { + ses := requests.NewSession() + ses.SetHeader(curl.Header) + ses.SetCookies(curl.ParsedURL, curl.Cookies) + return ses +} + +// CreateWorkflow 根据Session 创建Workflow +func (curl *CURL) CreateWorkflow(ses *requests.Session) *requests.Workflow { + var wf *requests.Workflow + switch curl.Method { + case "HEAD": + wf = ses.Head(curl.ParsedURL.String()) + case "GET": + wf = ses.Get(curl.ParsedURL.String()) + case "POST": + wf = ses.Post(curl.ParsedURL.String()) + case "PUT": + wf = ses.Put(curl.ParsedURL.String()) + case "PATCH": + wf = ses.Patch(curl.ParsedURL.String()) + case "OPTIONS": + wf = ses.Options(curl.ParsedURL.String()) + case "DELETE": + wf = ses.Delete(curl.ParsedURL.String()) + } + + wf.SetBody(curl.Body) + return wf +} + +// ParseRawCURL curl_bash +func ParseRawCURL(scurl string) (cURL *CURL, err error) { + + defer func() { + if _err := recover(); _err != nil { + cURL = nil + err = _err.(error) + } + }() + + curl := NewCURL() + scurl = strings.TrimSpace(scurl) + scurl = strings.TrimLeft(scurl, "curl") + mathches := regexp.MustCompile(`--[^ ]+ +'[^']+'|--[^ ]+ +[^ ]+|-[A-Za-z] +'[^']+'|-[A-Za-z] +[^ ]+| '[^']+'|--[a-z]+ {0,}`).FindAllString(scurl, -1) + for _, m := range mathches { + m = strings.TrimSpace(m) + switch v := m[0]; v { + case '\'': + purl, err := url.Parse(strings.Trim(m, "'")) + CheckError(err) + curl.ParsedURL = purl + case '-': + judgeAndParseOptions(curl, m) + } + } + + if curl.Method == "" { + curl.Method = "GET" + } + + return curl, nil +} + +func judgeAndParseOptions(u *CURL, soption string) { + switch prefix := soption[0:2]; prefix { + case "-H": + parseHeader(u, soption) + case "--": + parseLongOption(u, soption) + case "-X": + matches := regexp.MustCompile("-X +(.+)").FindStringSubmatch(soption) + method := strings.Trim(matches[1], "'") + u.Method = method + } +} + +func parseLongOption(u *CURL, soption string) { + // -d, --data HTTP POST data + // --data-ascii HTTP POST ASCII data + // --data-binary HTTP POST binary data + // --data-raw HTTP POST data, '@' allowed + // --data-urlencode HTTP POST data url encoded + + switch { + case regexp.MustCompile("^--data |^--data-urlencode|^--data-binary|^--data-ascii|^--data-raw").MatchString(soption): + datas := regexp.MustCompile("^--data-(binary) +(.+)|^--data-(ascii) +(.+)|^--data-(raw) +(.+)|^--data-(urlencode) +(.+)|^--(data) +(.+)").FindStringSubmatch(soption) + + dtype := datas[1] + data := strings.Trim(datas[2], "'") + + if u.Method != "" { + u.Method = "POST" + } + + switch dtype { + case "binary": + parseBodyBinary(u, data) + case "ascii": + parseBodyASCII(u, data) + case "raw": + parseBodyRaw(u, data) + case "urlencode": + parseBodyURLEncode(u, data) + case "data": + parseBodyASCII(u, data) + } + + } + +} + +func parseBodyURLEncode(u *CURL, data string) { + u.Body.SetPrefix(requests.TypeURLENCODED) + u.Body.SetIOBody(data) +} + +func parseBodyRaw(u *CURL, data string) { + u.Body.SetPrefix(requests.TypeURLENCODED) + u.Body.SetIOBody(data) +} + +func parseBodyASCII(u *CURL, data string) { + u.Body.SetPrefix(requests.TypeURLENCODED) + + if data[0] != '@' { + u.Body.SetIOBody(data) + } else { + f, err := os.Open(data[1:]) + CheckError(err) + defer f.Close() + + bdata, err := ioutil.ReadAll(f) + CheckError(err) + u.Body.SetIOBody(bdata) + } +} + +// 处理@ 并且替/r/n符号 +func parseBodyBinary(u *CURL, data string) { + u.Body.SetPrefix(requests.TypeURLENCODED) + + if data[0] != '@' { + u.Body.SetIOBody(data) + } else { + f, err := os.Open(data[1:]) + CheckError(err) + defer f.Close() + bdata, err := ioutil.ReadAll(f) + CheckError(err) + bdata = regexp.MustCompile("\n|\r").ReplaceAll(bdata, []byte("")) + u.Body.SetIOBody(bdata) + } +} + +func parseHeader(u *CURL, soption string) { + + matches := regexp.MustCompile(`'([^:]+): ([^']+)'`).FindAllStringSubmatch(soption, 1)[0] + key := matches[1] + value := matches[2] + + switch key { + case "Cookie": + u.Cookies = ReadRawCookies(value, "") + u.CookieJar.SetCookies(u.ParsedURL, u.Cookies) + case "Content-Type": + u.Body.SetPrefix(value) + default: + u.Header.Add(key, value) + } + +} diff --git a/parse_curl_test.go b/parse_curl_test.go new file mode 100644 index 0000000..a60a600 --- /dev/null +++ b/parse_curl_test.go @@ -0,0 +1,48 @@ +package curl2info + +import ( + "log" + "testing" +) + +func init() { + log.SetFlags(log.Llongfile) +} + +func TestParseCURL(t *testing.T) { + + scurls := []string{ + `curl 'https://saluton.cizion.com/livere' -H 'Referer: http://www.yxdm.tv/resource/9135.html' -H 'Origin: http://www.yxdm.tv' -H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36' -H 'Content-Type: application/json' --data-binary '{"type":"livere_pv","action":"loading","extra":{"useEagerLoading":false},"title":"哥布林杀手无删减版无暗牧无圣光 - 百度云网盘 - 全集动画下载 - 怡萱动漫","url":"http://www.yxdm.tv/resource/9135.html","consumer_seq":1020,"livere_seq":38141,"livere_referer":"www.yxdm.tv/resource/9135.html","sender":"tower","uuid":"e6213a42-41d0-4637-ad52-ccb48ba9cef1"}' --compressed`, + `curl 'https://saluton.cizion.com/livere' -X OPTIONS -H 'Access-Control-Request-Method: POST' -H 'Origin: http://www.yxdm.tv' -H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36' -H 'Access-Control-Request-Headers: content-type' --compressed`, + `curl 'https://www.google-analytics.com/r/collect' --socks5 http:127.0.0.1:7070 -H 'Referer: https://stackoverflow.com/questions/42754307/how-to-unescape-quoted-octal-strings-in-golang' -H 'Origin: https://stackoverflow.com' -H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36' -H 'Content-Type: text/plain;charset=UTF-8' --data-binary 'v=1&_v=j72&a=1104564653&t=pageview&_s=1&dl=https%3A%2F%2Fstackoverflow.com%2Fquestions%2F42754307%2Fhow-to-unescape-quoted-octal-strings-in-golang&ul=en-us&de=UTF-8&dt=go%20-%20How%20to%20unescape%20quoted%20octal%20strings%20in%20Golang%3F%20-%20Stack%20Overflow&sd=24-bit&sr=1476x830&vp=1412x268&je=0&_u=QACAAEAB~&jid=1066047028&gjid=2019145233&cid=572307198.1525508485&tid=UA-108242619-1&_gid=1131483813.1542548817&_r=1&cd2=%7Cstring%7Cgo%7Cescaping%7C&cd3=Questions%2FShow&z=26020125' --compressed`, + `curl 'https://www.baidu.com/s?wd=ExpandEnv&rsv_spt=1&rsv_iqid=0xc222c428000016de&issp=1&f=8&rsv_bp=0&rsv_idx=2&ie=utf-8&tn=baiduhome_pg&rsv_enter=1&rsv_n=2&rsv_sug3=1&rsv_sug1=1&rsv_sug7=100&rsv_sug2=0&inputT=330&rsv_sug4=331' -H 'Connection: keep-alive' -H 'Cache-Control: max-age=0' -H 'Upgrade-Insecure-Requests: 1' -H 'User-Agent: Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1' -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8' -H 'Accept-Encoding: gzip, deflate, br' -H 'Accept-Language: zh' -H 'Cookie: BIDUPSID=88B7FC40D50C2F811E57590167144216; BAIDUID=D2066189021D32D6C36CAB19E9160526:FG=1; PSTM=1533032566; BDUSS=UNQT1ZkZW1NSzc0VmdacFowSktScWdPN2NTT3ZGTzdVMTBSaG9FMjFMSWQwNWhiQVFBQUFBJCQAAAAAAAAAAAEAAABgEGEMNDc0NDIwNTAyAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB1GcVsdRnFbT; BD_UPN=123353; MCITY=-257%3A; delPer=0; BD_CK_SAM=1; PSINO=6; BD_HOME=1; BDRCVFR[feWj1Vr5u3D]=I67x6TjHwwYf0; locale=zh; H_PS_PSSID=1452_21082_18559_27401_26350_22160; sugstore=1; H_PS_645EC=78b8xypsezNZBdukGC%2Fhg6hxwjU6OnG%2BEOSA7%2BRZkLnJydHkWLS0dtpQlG6NKpJ0L8NT; BDSVRTM=0' --compressed`, + `curl 'https://stats.g.doubleclick.net/j/collect?t=dc&aip=1&_r=3&v=1&_v=j72&tid=UA-108242619-1&cid=271874387.1533111004&jid=2011203704&gjid=1070480086&_gid=115399732.1542609235&_u=SACAAEAAEAAAAC~&z=480262738' -X POST -H 'Referer: https://stackoverflow.com/questions/28262376/parse-cookie-string-in-golang' -H 'Origin: https://stackoverflow.com' -H 'User-Agent: Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1' -H 'Content-Type: text/plain' --compressed`, + `curl 'https://www.google.com.hk/gen_204?s=webhp&t=aft&atyp=csi&ei=irnzW97xEIzqwQOTpYmABQ&rt=wsrt.818,aft.105,prt.105' -H 'origin: https://www.google.com.hk' -H 'accept-encoding: gzip, deflate, br' -H 'accept-language: zh' -H 'ping-from: https://www.google.com.hk/webhp?gws_rd=cr,ssl' -H 'cookie: 1P_JAR=2018-11-20-07; NID=146=Iqtbc8EtJC9VhWWZEOMkEzscxK670vybRRaLSgEKwtJPiCaC_lRabSbBv1KWr6S3-pZ1S-VrZL4Efbwby65hjCB6SClVV7Lt0wpilw3Hr7_Uc5pzkkZOGDhVSobcl95Hs7HhuU6vb097Llu1g23NAU7mDLUB3FopfIq6lY4FpJoNhsi6L9nAnGdlXZI' -H 'x-client-data: CI22yQEIpLbJAQipncoBCKijygEY+aXKAQ==' -H 'user-agent: Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1' -H 'content-type: text/ping' -H 'accept: */*' -H 'cache-control: max-age=0' -H 'authority: www.google.com.hk' -H 'ping-to: javascript:void(0);' --data-binary 'PING' --compressed`, + } + // Access-Control-Request-Method 方法告诉 --data-binary 默认是POST + + for _, scurl := range scurls { + curl, err := ParseRawCURL(scurl) + if err != nil { + t.Error(err, "\n", curl.String()) + } + if curl.Method == "" { + t.Error("curl.Method is nil") + } + + } +} + +func TestTouTiaoCURL(t *testing.T) { + scurl := "curl 'http://is.snssdk.com/2/article/information/v24/?latitude=22.831367&longitude=113.511515&group_id=6565653745026204168&item_id=6565653745026204168&aggr_type=1&context=1&from_category=news_game&article_page=0&iid=34903754482&device_id=41148471494&ac=wifi&channel=oppo-cpa&aid=13&app_name=news_article&version_code=676&version_name=6.7.6&device_platform=android&ab_version=304489%2C261579%2C373245%2C360501%2C374617%2C366851%2C356335%2C345191%2C271178%2C357704%2C326524%2C326532%2C292723%2C366036%2C323233%2C371779%2C346557%2C351090%2C319958%2C372620%2C362184%2C214069%2C31643%2C333971%2C366873%2C374962%2C372618%2C280449%2C281298%2C366489%2C325619%2C373770%2C357402%2C361073%2C362402%2C290191%2C370014%2C353484%2C375739%2C373725%2C295827%2C353305%2C375426%2C374426%2C239095%2C360541%2C344347%2C170988%2C371590%2C368831%2C368827%2C368775%2C374117%2C365053%2C374232%2C368303%2C375692%2C330632%2C297059%2C374250%2C276206%2C286212%2C350193%2C365036%2C373741%2C374405%2C373368%2C370846%2C364453%2C375713%2C369501%2C369165%2C368839%2C375433%2C373123%2C371555%2C371963%2C374142%2C372907&ab_client=a1%2Cc4%2Ce1%2Cf1%2Cg2%2Cf7&ab_group=94567%2C102754%2C181430&ab_feature=94567%2C102754&abflag=3&ssmix=a&device_type=ONEPLUS+A3010&device_brand=OnePlus&language=zh&os_api=26&os_version=8.0.0&uuid=864854034514328&openudid=9b35a4035eecee2c&manifest_version_code=676&resolution=1080*1920&dpi=420&update_version_code=67610&_rticket=1528706910264&plugin=10603&pos=5r_-9Onkv6e_eCQieCoDeCUfv7G_8fLz-vTp6Pn4v6esrK6zqKysqKyosb_x_On06ej5-L-nr6-zpa6srquqsb_88Pzt3vTp5L-nv3gkIngqA3glH7-xv_zw_O3R8vP69Ono-fi_p6ysrrOupauqqaSxv_zw_O3R_On06ej5-L-nr66zrairpKqv4A%3D%3D&fp=HrT_FlD_PMcIFlD5FSU1FYmeFrxO&rom_version=26&ts=1528706911&as=a265e371dff53b57de5999&mas=0073e8ef3f9a8b842da0ead7d35c0597ea2ee0ccce5e5d5db5' -H 'Accept-Encoding: gzip' -H 'X-SS-REQ-TICKET: 1528706910267' -H 'User-Agent: Dalvik/2.1.0 (Linux; U; Android 8.0.0; ONEPLUS A3010 Build/OPR1.170623.032) NewsArticle/6.7.6 okhttp/3.10.0.1' -H 'Cookie: odin_tt=210899a257b5fe787a3465e2220fb94d91d5ad34c77dee3560f93fccc82dd738cccb301770f633530fdd6ceea955983d; UM_distinctid=163ace3b0050-08fccf530af621-f1c0e26-49a10-163ace3b0093e8; CNZZDATA1271720685=1435124261-1527612007-%7C1527612007; CNZZDATA1264530760=119491224-1527609979-%7C1527612115; JSESSIONID=67814B7DDE08D5A9F3B3D684220CF3FB; alert_coverage=6; qh[360]=1; install_id=34903754482; ttreq=1$b7221ef01bd5ed7c030f5db45e959686c9ddd0d2' -H 'Host: is.snssdk.com' -H 'Connection: Keep-Alive'" + curl, err := ParseRawCURL(scurl) + CheckError(err) + + ses := curl.CreateSession() + wf := curl.CreateWorkflow(ses) + resp, err := wf.Execute() + if err != nil { + t.Error(err) + } + t.Log(curl, resp.Content()) +}