Skip to content

Commit e46c4f3

Browse files
Add serve command (#2040)
* Add serve command to run the HTTP server command very similar to what is used in Replicate production. By making this command more within reach, my hope is that more Cog users will build familiarity with and opinions about the HTTP server interface. Connected to PLAT-259 * We must serve the Great Alphabet 🙇 Co-authored-by: F <[email protected]> Signed-off-by: Dan Buch <[email protected]> * Add a note about the `serve` command to README * Write the forwarded localhost server address in serve output --------- Signed-off-by: Dan Buch <[email protected]> Co-authored-by: F <[email protected]>
1 parent 966af51 commit e46c4f3

File tree

4 files changed

+110
-9
lines changed

4 files changed

+110
-9
lines changed

README.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,16 @@ $ curl http://localhost:5000/predictions -X POST \
8080
-d '{"input": {"image": "https://.../input.jpg"}}'
8181
```
8282

83+
Or, combine build and run via the `serve` command:
84+
85+
```console
86+
$ cog serve -p 8080
87+
88+
$ curl http://localhost:8080/predictions -X POST \
89+
-H 'Content-Type: application/json' \
90+
-d '{"input": {"image": "https://.../input.jpg"}}'
91+
```
92+
8393
<!-- NOTE (bfirsh): Development environment instructions intentionally left out of readme for now, so as not to confuse the "ship a model to production" message.
8494

8595
In development, you can also run arbitrary commands inside the Docker environment:

pkg/cli/root.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ https://github.com/replicate/cog`,
4545
newPredictCommand(),
4646
newPushCommand(),
4747
newRunCommand(),
48+
newServeCommand(),
4849
newTrainCommand(),
4950
)
5051

pkg/cli/serve.go

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
package cli
2+
3+
import (
4+
"runtime"
5+
"strings"
6+
7+
"github.com/replicate/cog/pkg/config"
8+
"github.com/replicate/cog/pkg/docker"
9+
"github.com/replicate/cog/pkg/image"
10+
"github.com/replicate/cog/pkg/util"
11+
"github.com/replicate/cog/pkg/util/console"
12+
"github.com/spf13/cobra"
13+
)
14+
15+
var (
16+
port = 8393
17+
)
18+
19+
func newServeCommand() *cobra.Command {
20+
cmd := &cobra.Command{
21+
Use: "serve",
22+
Short: "Run a prediction HTTP server",
23+
Long: `Run a prediction HTTP server.
24+
25+
Generate and run an HTTP server based on the declared model inputs and outputs.`,
26+
RunE: cmdServe,
27+
Args: cobra.MaximumNArgs(0),
28+
SuggestFor: []string{"http"},
29+
}
30+
31+
addBuildProgressOutputFlag(cmd)
32+
addUseCudaBaseImageFlag(cmd)
33+
addUseCogBaseImageFlag(cmd)
34+
addGpusFlag(cmd)
35+
36+
cmd.Flags().IntVarP(&port, "port", "p", port, "Port on which to listen")
37+
38+
return cmd
39+
}
40+
41+
func cmdServe(cmd *cobra.Command, arg []string) error {
42+
cfg, projectDir, err := config.GetConfig(projectDirFlag)
43+
if err != nil {
44+
return err
45+
}
46+
47+
imageName, err := image.BuildBase(cfg, projectDir, buildUseCudaBaseImage, DetermineUseCogBaseImage(cmd), buildProgressOutput)
48+
if err != nil {
49+
return err
50+
}
51+
52+
gpus := ""
53+
if gpusFlag != "" {
54+
gpus = gpusFlag
55+
} else if cfg.Build.GPU {
56+
gpus = "all"
57+
}
58+
59+
args := []string{
60+
"python",
61+
"--check-hash-based-pycs", "never",
62+
"-m", "cog.server.http",
63+
"--await-explicit-shutdown", "true",
64+
}
65+
66+
runOptions := docker.RunOptions{
67+
Args: args,
68+
Env: envFlags,
69+
GPUs: gpus,
70+
Image: imageName,
71+
Volumes: []docker.Volume{{Source: projectDir, Destination: "/src"}},
72+
Workdir: "/src",
73+
}
74+
75+
if util.IsAppleSiliconMac(runtime.GOOS, runtime.GOARCH) {
76+
runOptions.Platform = "linux/amd64"
77+
}
78+
79+
runOptions.Ports = append(runOptions.Ports, docker.Port{HostPort: port, ContainerPort: 5000})
80+
81+
console.Info("")
82+
console.Infof("Running '%[1]s' in Docker with the current directory mounted as a volume...", strings.Join(args, " "))
83+
console.Info("")
84+
console.Infof("Serving at http://127.0.0.1:%[1]v", port)
85+
console.Info("")
86+
87+
err = docker.Run(runOptions)
88+
// Only retry if we're using a GPU but but the user didn't explicitly select a GPU with --gpus
89+
// If the user specified the wrong GPU, they are explicitly selecting a GPU and they'll want to hear about it
90+
if runOptions.GPUs == "all" && err == docker.ErrMissingDeviceDriver {
91+
console.Info("Missing device driver, re-trying without GPU")
92+
93+
runOptions.GPUs = ""
94+
err = docker.Run(runOptions)
95+
}
96+
97+
return err
98+
}

test-integration/test_integration/util.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -137,17 +137,9 @@ def cog_server_http_run(project_dir: str):
137137
server = subprocess.Popen(
138138
[
139139
"cog",
140-
"run",
141-
"-e",
142-
f"PORT={port}",
140+
"serve",
143141
"-p",
144142
str(port),
145-
"python",
146-
"--check-hash-based-pycs",
147-
"never",
148-
"-m",
149-
"cog.server.http",
150-
"--await-explicit-shutdown=true",
151143
],
152144
cwd=project_dir,
153145
# NOTE: inheriting stdout and stderr from the parent process when running

0 commit comments

Comments
 (0)