Code
rainbowR Conference Workshop
Setup
If you are not working in Posit Cloud, or you do not have the data/ directory showing up in Posit Cloud, you can download them by running:
# Download workshop materials (slides + data)
download.file(
url = "https://tinyurl.com/rainbowrllms",
destfile = "workshop-materials.zip"
)
# Unzip and move contents to root
unzip("workshop-materials.zip")
file.copy(
list.files("rainbowrworkshop-materials", full.names = TRUE),
".", recursive = TRUE)
unlink("rainbowrworkshop-materials", recursive = TRUE)Code
All the code and exercises for the workshop can be found below.
3 - Getting Started with LLMs in R
## Editing your .Renviron
usethis::edit_r_environ()
## Starting a conversation
library(ellmer)
chat <- chat_openrouter(model = "google/gemini-2.5-flash")
## Sending a message to the LLM
library(ellmer)
chat <- chat_openrouter(model = "google/gemini-2.5-flash")
chat$chat("What is the most popular LGBT movie of all time?
Answer succinctly.")
## Turns
chat
## Continuing the conversation
chat$chat("Who were the stars of that movie?")
chat$chat("What year was it released?")
chat
## Reset - just start a new chat
chat <- chat_openrouter(model = "google/gemini-2.5-flash")
chat$chat("What were we saying earlier?")
## Training cutoff date
chat$chat("what date is it right now?")
chat$chat("Are you sure about that?")Your Turn
- Set up your API key using
usethis::edit_r_environ()+ restart R - Start a conversation:
chat_openrouter(model = "google/gemini-2.5-flash") - Ask what the most popular LGBT movie was in 1995 and then 2026
4 - Prompt Engineering
## Load the data to work with
library(tidyverse)
movies <- read_csv("data/movies.csv")
pride_synopsis <- movies |>
filter(title == "Pride") |>
pull(synopsis)
pride_synopsis
## Ask questions
chat <- chat_openrouter(model = "google/gemini-2.5-flash")
chat$chat("What year was this movie set in?", pride_synopsis)
## The LLM is very chatty!
chat <- chat_openrouter(model = "google/gemini-2.5-flash")
chat$chat("Summarise this movie:", pride_synopsis)
## Setting the system prompt
chat <- chat_openrouter(
model = "google/gemini-2.5-flash",
system_prompt = "Respond in 35 words or fewer"
)
chat$chat("Summarise this movie:", pride_synopsis)
chat$chat("tell me every character and what they do")
## Run it again though...responses vary
chat <- chat_openrouter(
model = "google/gemini-2.5-flash",
system_prompt = "Respond in 35 words or fewer"
)
chat$chat("Summarise this movie:", pride_synopsis)
## 1. Define a role - prompt
prompt <- "You are a movie summariser whose job is to sum up the
genre and themes of movies to help users decide if they want to watch.
Respond in 50 words or fewer"
chat <- chat_openrouter(
model = "google/gemini-2.5-flash",
system_prompt = prompt
)
chat$chat("Summarise the movie: ", pride_synopsis)
# if we run it again we get different output
chat <- chat_openrouter(
model = "google/gemini-2.5-flash",
system_prompt = prompt
)
chat$chat("Summarise the movie: ", pride_synopsis)
## 2 - Defining the output format - prompt
prompt <- "You are a movie summariser whose job is to sum up the
genre and themes of movies to help users decide if they want to watch.
Respond in 50 words or fewer
## Output format
Plot outline: a 1 sentence description
Genre: movie genre
Setting: time and place
"
chat <- chat_openrouter(
model = "google/gemini-2.5-flash",
system_prompt = prompt
)
chat$chat("Summarise the movie: ", pride_synopsis)
## 3 - Getting more specific - prompt
prompt <- "You are a movie summariser whose job is to sum up the
genre and themes of movies to help users decide if they want to watch.
Respond in 50 words or fewer
## Output format
Plot outline: a 1 sentence description
Genre: movie genre (see choices below)
Setting: time and place
## Choices
Possible movie genres to choose from: Action, Comedy, Drama,
Horror, Science, Fiction, Thriller, Romance, Fantasy, Adventure, Documentary
"
chat <- chat_openrouter(
model = "google/gemini-2.5-flash",
system_prompt = prompt
)
chat$chat("Summarise the movie: ", pride_synopsis)
## 4 - Iterate - be more strongly worded - prompt
prompt <- "You are a movie summariser whose job is to sum up the
genre and themes of movies to help users decide if they want to watch.
Respond in 50 words or fewer
## Output format
Plot outline: a 1 sentence description
Genre: movie genre (see choices below)
Setting: time and place
## Choices
You MUST only choose genres that appear on this list: Action, Comedy,
Drama, Horror, Science, Fiction, Thriller, Romance, Fantasy, Adventure,
Documentary"
chat <- chat_openrouter(
model = "google/gemini-2.5-flash",
system_prompt = prompt
)
chat$chat("Summarise the movie: ", pride_synopsis)Your turn
- Pick a different movie from the movie dataset and run the “best” version of the prompt on it
- Update the prompt to also extract information about the movie’s themes
- See if you can update it to show what you would want to see in a summary
5 - Extracting Structured Data
## The problem with unstructured output
chat <- chat_openrouter(model = "google/gemini-2.5-flash")
output <- chat$chat(
"Extract year and location from this data
## Output format
Year:
Location",
pride_synopsis
)
output
class(output)
## Extracting a single value
year_type <- type_integer("Year the movie is set in")
chat <- chat_openrouter(model = "google/gemini-2.5-flash")
chat$chat_structured(pride_synopsis, type = year_type)
## Extracting text
location_type <- type_string("Primary location of the movie")
chat <- chat_openrouter(model = "google/gemini-2.5-flash")
chat$chat_structured(pride_synopsis, type = location_type)
## Constraining to specific choices
movie_genres <- c(
"Action", "Comedy", "Drama", "Horror",
"Science Fiction", "Thriller", "Romance",
"Fantasy", "Adventure", "Documentary"
)
genre_type <- type_enum(movie_genres, "The genre of the movie")
chat <- chat_openrouter(model = "google/gemini-2.5-flash")
chat$chat_structured(pride_synopsis, type = genre_type)
## Extracting text
location_type <- type_array(type_string("locations of the movie"))
chat <- chat_openrouter(model = "google/gemini-2.5-flash")
chat$chat_structured(pride_synopsis, type = location_type)
## Combining multiple fields
year_type <- type_integer("Year the movie is set in")
genre_type <- type_enum(movie_genres, "The genre of the movie")
movie_info_type <- type_object(
year_set = year_type,
genre = genre_type
)
chat <- chat_openrouter(model = "google/gemini-2.5-flash")
chat$chat_structured(pride_synopsis, type = movie_info_type)
## Processing multiple movies
prompts <- paste("Extract data from this movie synopsis:", movies$synopsis)
prompts
# parallel_chat_structured is a little different
output <- parallel_chat_structured(
chat = chat_openrouter(model = "google/gemini-2.5-flash"),
prompts = as.list(prompts),
type = movie_info_type
)
output
## Combining them
movies_with_genres <- bind_cols(movies, output)
movies_with_genres
## Save to a file
readr::write_csv("./data/movies_with_genres.csv")Your turn
- Pick a different movie from the dataset
- Extract the year and genre using
chat_structured() - Process all movies using
parallel_chat_structured() - Bonus: Add a new field for the movie’s tone using
type_enum()with choices like “Uplifting”, “Dark”, “Comedic”, “Bittersweet”
6 - Tool Calling
# Define the function
sample_movies <- function() {
read.csv("data/movies_with_genres.csv") |>
slice_sample(n = 1)
}
# Create tool
sample_movies_tool <- tool(
sample_movies,
description = "Randomly sample a movie from the dataset"
)
## Initialise a chat and register the tool
prompt <- "You are a movie recommender bot designed to help users
pick potential films to add to their watch list. Sample a
random film and give the user a 20 word summary of it."
chat <- chat_openrouter(
model = "google/gemini-2.5-flash",
system_prompt = prompt
)
chat$register_tool(sample_movies_tool)
## Use the tool!
chat$chat("What movie should I watch this weekend?")
## Update the function to filter the data
movie_genres <- c(
"Action", "Comedy", "Drama", "Horror",
"Science Fiction", "Thriller", "Romance",
"Fantasy", "Adventure", "Documentary"
)
sample_and_filter_movies <- function(genre_choice = movie_genres) {
read.csv("data/movies_with_genres.csv") |>
filter(genre %in% genre_choice) |>
slice_sample(n = 1)
}
## Define tool using updated function
sample_and_filter_movies_tool <- tool(
sample_and_filter_movies,
description = "Randomly sample a movie from the dataset",
arguments = list(
genre_choice = type_array(
type_enum(
values = movie_genres,
"Movie genres to choose from"
)
)
)
)
## Register the updated tool
prompt <- "You are a movie recommender bot designed to help users
pick potential films to add to their watch list. Sample a
random film and give the user a 20 word summary of it.
"
chat <- chat_openrouter(
model = "google/gemini-2.5-flash",
system_prompt = prompt
)
chat$register_tool(sample_and_filter_movies_tool)
## Ask the question
live_console(chat)
# What movie should I watch tonight? I like scifi!
Your turn
- Try using the tool and ask different questions and see if you can get them LLM to call it repeatedly
Wrapping up
Thanks so much for coming along! Please fill in the workshop feedback: https://forms.gle/LG3VH2o5qcNBp6aLA
If you want to learn more about using LLMs in R, I’ve started a newsletter: https://aifordatapeople.beehiiv.com/