If you want to send videos or images to the Google Gemini API, eg for video summarization, you need to upload the video to a separate endpoint and wait for it to get processed.
This is described in the Gemini docs, and it's fairly straightforward to translate into Elixir but in case you want to just grab the code I used(and save a few minutes of prompting an LLM :P), here is the code I used:
defmodule PodClipper.Gemini do
require Logger
@base_url "https://generativelanguage.googleapis.com"
def upload_video(video_path) do
mime_type = get_mime_type(video_path)
num_bytes = File.stat!(video_path).size
display_name = Path.basename(video_path)
api_key = get_api_key()
# Initial resumable request
response =
Req.post!(
"#{@base_url}/upload/v1beta/files",
params: [key: api_key],
headers: [
{"x-goog-upload-protocol", "resumable"},
{"x-goog-upload-command", "start"},
{"x-goog-upload-header-content-length", "#{num_bytes}"},
{"x-goog-upload-header-content-type", mime_type},
{"content-type", "application/json"}
],
json: %{
file: %{
display_name: display_name
}
}
)
case get_upload_url(response) do
{:ok, upload_url} ->
# Upload the actual bytes
response =
Req.post!(upload_url,
headers: [
{"content-length", "#{num_bytes}"},
{"x-goog-upload-offset", "0"},
{"x-goog-upload-command", "upload, finalize"}
],
body: File.read!(video_path)
)
case response.body do
%{"file" => %{"uri" => file_uri}} -> {:ok, file_uri}
_ -> {:error, "Failed to get file URI from response"}
end
error ->
error
end
end
def wait_for_processing(file_uri, poll_interval_ms \\ 1000) do
case get_file_state(file_uri) do
"PROCESSING" ->
Logger.info("Processing video...")
Process.sleep(poll_interval_ms)
wait_for_processing(file_uri)
"ACTIVE" ->
{:ok, file_uri}
state ->
{:error, "Unexpected file state: #{state}"}
end
end
defp get_file_state(file_uri) do
response =
Req.get!(file_uri,
params: [key: get_api_key()])
Map.get(response.body, "state")
end
defp get_upload_url(%Req.Response{} = response) do
case Req.Response.get_header(response, "x-goog-upload-url") do
[url | _] -> {:ok, url}
e -> {:error, "Failed to get upload URL from response. #{inspect(e)}"}
end
end
defp get_mime_type(file_path) do
{mime_type, 0} = System.cmd("file", ["-b", "--mime-type", file_path])
String.trim(mime_type)
end
defp get_api_key do
System.get_env("GEMINI_API_KEY")
end
end
Usage
{:ok, uri} = PodClipper.Gemini.upload_video("/Users/samrat/Downloads/348f603d-3a7b-4975-bf98-76fe7df03be9.mp4")
PodClipper.Gemini.wait_for_processing(uri)
Once I upload the image/video, I use it in an Instructor call but you could also directly call the Gemini chat completion endpoint.
def describe_video(video_path) do
with {:ok, file_uri} <- upload_video(video_path),
{:ok, file_uri} <- wait_for_processing(file_uri) do
generate_content(file_uri)
end
end
defp generate_content(file_uri) do
response =
Req.post!(
"#{@base_url}/v1beta/models/gemini-1.5-flash:generateContent",
params: [key: get_api_key()],
json: %{
contents: [
%{
parts: [
%{text: "Describe this video clip"},
%{file_data: %{mime_type: "video/mp4", file_uri: file_uri}}
]
}
]
}
)
case response.body do
%{"candidates" => [%{"content" => %{"parts" => [%{"text" => text}]}} | _]} ->
{:ok, text}
_ ->
{:error, "Failed to parse response"}
end
end