Add serve command (#2040)

meatballhat · erbridge · web-flow · commit e46c4f3a97fa · 2024-11-08T14:10:22.000-05:00
* Add serve command to run the HTTP server command very similar to what is used in Replicate production. By making this command more within reach, my hope is that more Cog users will build familiarity with and opinions about the HTTP server interface. Connected to PLAT-259 * We must serve the Great Alphabet 🙇 Co-authored-by: F <f@replicate.com> Signed-off-by: Dan Buch <dan@meatballhat.com> * Add a note about the `serve` command to README * Write the forwarded localhost server address in serve output --------- Signed-off-by: Dan Buch <dan@meatballhat.com> Co-authored-by: F <f@replicate.com>
diff --git a/README.md b/README.md
@@ -80,6 +80,16 @@ $ curl http://localhost:5000/predictions -X POST \
     -d '{"input": {"image": "https://.../input.jpg"}}'
 ```
 
+Or, combine build and run via the `serve` command:
+
+```console
+$ cog serve -p 8080
+
+$ curl http://localhost:8080/predictions -X POST \
+    -H 'Content-Type: application/json' \
+    -d '{"input": {"image": "https://.../input.jpg"}}'
+```
+
 <!-- NOTE (bfirsh): Development environment instructions intentionally left out of readme for now, so as not to confuse the "ship a model to production" message.
 
 In development, you can also run arbitrary commands inside the Docker environment:
diff --git a/pkg/cli/root.go b/pkg/cli/root.go
@@ -45,6 +45,7 @@ https://github.com/replicate/cog`,
 		newPredictCommand(),
 		newPushCommand(),
 		newRunCommand(),
+		newServeCommand(),
 		newTrainCommand(),
 	)
 
diff --git a/pkg/cli/serve.go b/pkg/cli/serve.go
@@ -0,0 +1,98 @@
+package cli
+
+import (
+	"runtime"
+	"strings"
+
+	"github.com/replicate/cog/pkg/config"
+	"github.com/replicate/cog/pkg/docker"
+	"github.com/replicate/cog/pkg/image"
+	"github.com/replicate/cog/pkg/util"
+	"github.com/replicate/cog/pkg/util/console"
+	"github.com/spf13/cobra"
+)
+
+var (
+	port = 8393
+)
+
+func newServeCommand() *cobra.Command {
+	cmd := &cobra.Command{
+		Use:   "serve",
+		Short: "Run a prediction HTTP server",
+		Long: `Run a prediction HTTP server.
+
+Generate and run an HTTP server based on the declared model inputs and outputs.`,
+		RunE:       cmdServe,
+		Args:       cobra.MaximumNArgs(0),
+		SuggestFor: []string{"http"},
+	}
+
+	addBuildProgressOutputFlag(cmd)
+	addUseCudaBaseImageFlag(cmd)
+	addUseCogBaseImageFlag(cmd)
+	addGpusFlag(cmd)
+
+	cmd.Flags().IntVarP(&port, "port", "p", port, "Port on which to listen")
+
+	return cmd
+}
+
+func cmdServe(cmd *cobra.Command, arg []string) error {
+	cfg, projectDir, err := config.GetConfig(projectDirFlag)
+	if err != nil {
+		return err
+	}
+
+	imageName, err := image.BuildBase(cfg, projectDir, buildUseCudaBaseImage, DetermineUseCogBaseImage(cmd), buildProgressOutput)
+	if err != nil {
+		return err
+	}
+
+	gpus := ""
+	if gpusFlag != "" {
+		gpus = gpusFlag
+	} else if cfg.Build.GPU {
+		gpus = "all"
+	}
+
+	args := []string{
+		"python",
+		"--check-hash-based-pycs", "never",
+		"-m", "cog.server.http",
+		"--await-explicit-shutdown", "true",
+	}
+
+	runOptions := docker.RunOptions{
+		Args:    args,
+		Env:     envFlags,
+		GPUs:    gpus,
+		Image:   imageName,
+		Volumes: []docker.Volume{{Source: projectDir, Destination: "/src"}},
+		Workdir: "/src",
+	}
+
+	if util.IsAppleSiliconMac(runtime.GOOS, runtime.GOARCH) {
+		runOptions.Platform = "linux/amd64"
+	}
+
+	runOptions.Ports = append(runOptions.Ports, docker.Port{HostPort: port, ContainerPort: 5000})
+
+	console.Info("")
+	console.Infof("Running '%[1]s' in Docker with the current directory mounted as a volume...", strings.Join(args, " "))
+	console.Info("")
+	console.Infof("Serving at http://127.0.0.1:%[1]v", port)
+	console.Info("")
+
+	err = docker.Run(runOptions)
+	// Only retry if we're using a GPU but but the user didn't explicitly select a GPU with --gpus
+	// If the user specified the wrong GPU, they are explicitly selecting a GPU and they'll want to hear about it
+	if runOptions.GPUs == "all" && err == docker.ErrMissingDeviceDriver {
+		console.Info("Missing device driver, re-trying without GPU")
+
+		runOptions.GPUs = ""
+		err = docker.Run(runOptions)
+	}
+
+	return err
+}
diff --git a/test-integration/test_integration/util.py b/test-integration/test_integration/util.py
@@ -137,17 +137,9 @@ def cog_server_http_run(project_dir: str):
         server = subprocess.Popen(
             [
                 "cog",
-                "run",
-                "-e",
-                f"PORT={port}",
+                "serve",
                 "-p",
                 str(port),
-                "python",
-                "--check-hash-based-pycs",
-                "never",
-                "-m",
-                "cog.server.http",
-                "--await-explicit-shutdown=true",
             ],
             cwd=project_dir,
             # NOTE: inheriting stdout and stderr from the parent process when running

Original file line number	Diff line number	Diff line change
@@ -45,6 +45,7 @@ https://github.com/replicate/cog`,
`45`	`45`	`newPredictCommand(),`
`46`	`46`	`newPushCommand(),`
`47`	`47`	`newRunCommand(),`
	`48`	`+ newServeCommand(),`
`48`	`49`	`newTrainCommand(),`
`49`	`50`	`)`
`50`	`51`