curl2info/parse_curl.go
2018-11-21 18:58:49 +08:00

255 lines
5.6 KiB
Go

package curl2info
import (
"errors"
"fmt"
"io/ioutil"
"net/http"
"net/http/cookiejar"
"net/url"
"os"
"regexp"
"strings"
"github.com/474420502/requests"
)
// CURL 信息结构
type CURL struct {
ParsedURL *url.URL
Method string
Header http.Header
CookieJar http.CookieJar
Cookies []*http.Cookie
Body *requests.Body
}
// NewCURL new 一个 curl 出来
func NewCURL(scurl ...string) *CURL {
if len(scurl) != 0 {
if len(scurl) > 1 {
panic(errors.New("NewCURL only accept one curl info"))
}
curl, err := ParseRawCURL(scurl[0])
if err != nil {
panic(err)
}
return curl
}
u := &CURL{}
u.Header = make(http.Header)
u.CookieJar, _ = cookiejar.New(nil)
u.Body = requests.NewBody()
return u
}
func (curl *CURL) String() string {
return fmt.Sprintf("Method: %s\nParsedURL: %s\nHeader: %s\nCookie: %s",
curl.Method, curl.ParsedURL.String(), curl.Header, curl.Cookies)
}
// CreateSession 创建Session
func (curl *CURL) CreateSession() *requests.Session {
ses := requests.NewSession()
ses.SetHeader(curl.Header)
ses.SetCookies(curl.ParsedURL, curl.Cookies)
return ses
}
// CreateWorkflow 根据Session 创建Workflow
func (curl *CURL) CreateWorkflow(ses *requests.Session) *requests.Workflow {
var wf *requests.Workflow
switch curl.Method {
case "HEAD":
wf = ses.Head(curl.ParsedURL.String())
case "GET":
wf = ses.Get(curl.ParsedURL.String())
case "POST":
wf = ses.Post(curl.ParsedURL.String())
case "PUT":
wf = ses.Put(curl.ParsedURL.String())
case "PATCH":
wf = ses.Patch(curl.ParsedURL.String())
case "OPTIONS":
wf = ses.Options(curl.ParsedURL.String())
case "DELETE":
wf = ses.Delete(curl.ParsedURL.String())
}
wf.SetBody(curl.Body)
return wf
}
// ParseRawCURL curl_bash
func ParseRawCURL(scurl string) (cURL *CURL, err error) {
defer func() {
if _err := recover(); _err != nil {
cURL = nil
err = _err.(error)
}
}()
curl := NewCURL()
scurl = strings.TrimSpace(scurl)
scurl = strings.TrimLeft(scurl, "curl")
mathches := regexp.MustCompile(`--[^ ]+ +'[^']+'|--[^ ]+ +[^ ]+|-[A-Za-z] +'[^']+'|-[A-Za-z] +[^ ]+| '[^']+'|--[a-z]+ {0,}`).FindAllString(scurl, -1)
for _, m := range mathches {
m = strings.TrimSpace(m)
switch v := m[0]; v {
case '\'':
purl, err := url.Parse(strings.Trim(m, "'"))
if err != nil {
panic(err)
}
curl.ParsedURL = purl
case '-':
judgeAndParseOptions(curl, m)
}
}
if curl.Method == "" {
curl.Method = "GET"
}
return curl, nil
}
// ParseFunction 优先执行参数
type ParseFunction struct {
ExecuteFunction func(u *CURL, soption string)
ParamCURL *CURL
ParamData string
Prioty int
}
func judgeAndParseOptions(u *CURL, soption string) *ParseFunction {
switch prefix := soption[0:2]; prefix {
case "-H":
return &ParseFunction{ParamCURL: u, ParamData: soption, ExecuteFunction: parseHeader, Prioty: 1}
case "-X":
return &ParseFunction{ParamCURL: u, ParamData: soption, ExecuteFunction: parseOptX, Prioty: 1}
case "-A": // User-Agent 先后顺序的问题
parseHeader(u, soption)
case "-I":
u.Method = "HEAD"
case "--":
parseLongOption(u, soption)
}
}
func parseOptX(u *CURL, soption string) {
matches := regexp.MustCompile("-X +(.+)").FindStringSubmatch(soption)
method := strings.Trim(matches[1], "'")
u.Method = method
}
func parseLongOption(u *CURL, soption string) {
// -d, --data <data> HTTP POST data
// --data-ascii <data> HTTP POST ASCII data
// --data-binary <data> HTTP POST binary data
// --data-raw <data> HTTP POST data, '@' allowed
// --data-urlencode <data> HTTP POST data url encoded
switch {
case regexp.MustCompile("^--data |^--data-urlencode|^--data-binary|^--data-ascii|^--data-raw").MatchString(soption):
datas := regexp.MustCompile("^--data-(binary) +(.+)|^--data-(ascii) +(.+)|^--data-(raw) +(.+)|^--data-(urlencode) +(.+)|^--(data) +(.+)").FindStringSubmatch(soption)
dtype := datas[1]
data := strings.Trim(datas[2], "'")
if u.Method != "" {
u.Method = "POST"
}
switch dtype {
case "binary":
parseBodyBinary(u, data)
case "ascii":
parseBodyASCII(u, data)
case "raw":
parseBodyRaw(u, data)
case "urlencode":
parseBodyURLEncode(u, data)
case "data":
parseBodyASCII(u, data)
}
case regexp.MustCompile("^--header").MatchString(soption):
parseHeader(u, soption)
}
}
func parseBodyURLEncode(u *CURL, data string) {
u.Body.SetPrefix(requests.TypeURLENCODED)
u.Body.SetIOBody(data)
}
func parseBodyRaw(u *CURL, data string) {
u.Body.SetPrefix(requests.TypeURLENCODED)
u.Body.SetIOBody(data)
}
func parseBodyASCII(u *CURL, data string) {
u.Body.SetPrefix(requests.TypeURLENCODED)
if data[0] != '@' {
u.Body.SetIOBody(data)
} else {
f, err := os.Open(data[1:])
if err != nil {
panic(err)
}
defer f.Close()
bdata, err := ioutil.ReadAll(f)
if err != nil {
panic(err)
}
u.Body.SetIOBody(bdata)
}
}
// 处理@ 并且替/r/n符号
func parseBodyBinary(u *CURL, data string) {
u.Body.SetPrefix(requests.TypeURLENCODED)
if data[0] != '@' {
u.Body.SetIOBody(data)
} else {
f, err := os.Open(data[1:])
if err != nil {
panic(err)
}
defer f.Close()
bdata, err := ioutil.ReadAll(f)
if err != nil {
panic(err)
}
bdata = regexp.MustCompile("\n|\r").ReplaceAll(bdata, []byte(""))
u.Body.SetIOBody(bdata)
}
}
func parseHeader(u *CURL, soption string) {
matches := regexp.MustCompile(`'([^:]+): ([^']+)'`).FindAllStringSubmatch(soption, 1)[0]
key := matches[1]
value := matches[2]
switch key {
case "Cookie":
u.Cookies = ReadRawCookies(value, "")
u.CookieJar.SetCookies(u.ParsedURL, u.Cookies)
case "Content-Type":
u.Body.SetPrefix(value)
default:
u.Header.Add(key, value)
}
}