스프링부트에서 Ollama 사용하기

스프링부트에서 Ollama 사용하기

스프링부트 애플리케이션에서 Ollama를 통합하여 로컬 LLM을 사용하는 방법을 안내합니다.

1. Ollama 서버 준비

먼저 Ollama를 설치하고 모델을 준비합니다.

# Ollama 설치 (macOS/Linux)
curl -fsSL https://ollama.com/install.sh | sh

# 모델 다운로드
ollama pull llama2:13b

# Ollama 서버 실행 (기본 포트: 11434)
ollama serve

2. Spring Boot 프로젝트 설정

build.gradle

dependencies {
implementation 'org.springframework.boot:spring-boot-starter-web'
implementation 'org.springframework.boot:spring-boot-starter-webflux'
compileOnly 'org.projectlombok:lombok'
annotationProcessor 'org.projectlombok:lombok'
}

application.yml

ollama:
base-url: http://localhost:11434
model: llama2:13b
timeout: 300 # seconds

spring:
application:
name: ollama-demo

3. Ollama 클라이언트 구현

방법 1: RestTemplate 사용

OllamaProperties.java

package com.example.config;

import lombok.Getter;
import lombok.Setter;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.stereotype.Component;

@Getter
@Setter
@Component
@ConfigurationProperties(prefix = "ollama")
public class OllamaProperties {
private String baseUrl;
private String model;
private Integer timeout;
}

OllamaRequest.java

package com.example.dto;

import com.fasterxml.jackson.annotation.JsonProperty;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;

@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class OllamaRequest {
private String model;
private String prompt;
private Boolean stream;

@JsonProperty("num_ctx")
private Integer numCtx; // context window size

private Double temperature;

@JsonProperty("num_predict")
private Integer numPredict; // max tokens to generate
}

OllamaResponse.java

package com.example.dto;

import com.fasterxml.jackson.annotation.JsonProperty;
import lombok.Data;

@Data
public class OllamaResponse {
private String model;

@JsonProperty("created_at")
private String createdAt;

private String response;

private Boolean done;

@JsonProperty("total_duration")
private Long totalDuration;

@JsonProperty("load_duration")
private Long loadDuration;

@JsonProperty("prompt_eval_count")
private Integer promptEvalCount;

@JsonProperty("eval_count")
private Integer evalCount;
}

OllamaService.java

package com.example.service;

import com.example.config.OllamaProperties;
import com.example.dto.OllamaRequest;
import com.example.dto.OllamaResponse;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.boot.web.client.RestTemplateBuilder;
import org.springframework.http.HttpEntity;
import org.springframework.http.HttpHeaders;
import org.springframework.http.MediaType;
import org.springframework.stereotype.Service;
import org.springframework.web.client.RestTemplate;

import java.time.Duration;

@Slf4j
@Service
@RequiredArgsConstructor
public class OllamaService {

private final OllamaProperties ollamaProperties;
private final RestTemplate restTemplate;

public OllamaService(OllamaProperties ollamaProperties, RestTemplateBuilder builder) {
this.ollamaProperties = ollamaProperties;
this.restTemplate = builder
.setConnectTimeout(Duration.ofSeconds(10))
.setReadTimeout(Duration.ofSeconds(ollamaProperties.getTimeout()))
.build();
}

public String generate(String prompt) {
return generate(prompt, false);
}

public String generate(String prompt, boolean stream) {
String url = ollamaProperties.getBaseUrl() + "/api/generate";

OllamaRequest request = OllamaRequest.builder()
.model(ollamaProperties.getModel())
.prompt(prompt)
.stream(stream)
.temperature(0.7)
.numCtx(4096)
.build();

HttpHeaders headers = new HttpHeaders();
headers.setContentType(MediaType.APPLICATION_JSON);

HttpEntity<OllamaRequest> entity = new HttpEntity<>(request, headers);

log.info("Sending request to Ollama: {}", prompt);

OllamaResponse response = restTemplate.postForObject(
url,
entity,
OllamaResponse.class
);

if (response != null) {
log.info("Received response from Ollama in {}ms",
response.getTotalDuration() / 1_000_000);
return response.getResponse();
}

return null;
}
}

방법 2: WebClient 사용 (비동기)

OllamaWebClientService.java

package com.example.service;

import com.example.config.OllamaProperties;
import com.example.dto.OllamaRequest;
import com.example.dto.OllamaResponse;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import org.springframework.web.reactive.function.client.WebClient;
import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono;

import java.time.Duration;

@Slf4j
@Service
@RequiredArgsConstructor
public class OllamaWebClientService {

private final OllamaProperties ollamaProperties;
private final WebClient webClient;

public OllamaWebClientService(OllamaProperties ollamaProperties) {
this.ollamaProperties = ollamaProperties;
this.webClient = WebClient.builder()
.baseUrl(ollamaProperties.getBaseUrl())
.build();
}

public Mono<String> generateAsync(String prompt) {
OllamaRequest request = OllamaRequest.builder()
.model(ollamaProperties.getModel())
.prompt(prompt)
.stream(false)
.temperature(0.7)
.build();

return webClient.post()
.uri("/api/generate")
.bodyValue(request)
.retrieve()
.bodyToMono(OllamaResponse.class)
.timeout(Duration.ofSeconds(ollamaProperties.getTimeout()))
.map(OllamaResponse::getResponse)
.doOnSuccess(response -> log.info("Async response received"))
.doOnError(error -> log.error("Error during async call", error));
}

public Flux<String> generateStream(String prompt) {
OllamaRequest request = OllamaRequest.builder()
.model(ollamaProperties.getModel())
.prompt(prompt)
.stream(true)
.temperature(0.7)
.build();

return webClient.post()
.uri("/api/generate")
.bodyValue(request)
.retrieve()
.bodyToFlux(OllamaResponse.class)
.map(OllamaResponse::getResponse)
.doOnNext(chunk -> log.debug("Stream chunk received: {}", chunk));
}
}

4. REST 컨트롤러 구현

OllamaController.java

package com.example.controller;

import com.example.service.OllamaService;
import com.example.service.OllamaWebClientService;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.http.MediaType;
import org.springframework.web.bind.annotation.*;
import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono;

import java.util.Map;

@Slf4j
@RestController
@RequestMapping("/api/ollama")
@RequiredArgsConstructor
public class OllamaController {

private final OllamaService ollamaService;
private final OllamaWebClientService ollamaWebClientService;

@PostMapping("/generate")
public Map<String, String> generate(@RequestBody Map<String, String> request) {
String prompt = request.get("prompt");
log.info("Received prompt: {}", prompt);

String response = ollamaService.generate(prompt);

return Map.of("response", response);
}

@PostMapping("/generate/async")
public Mono<Map<String, String>> generateAsync(@RequestBody Map<String, String> request) {
String prompt = request.get("prompt");

return ollamaWebClientService.generateAsync(prompt)
.map(response -> Map.of("response", response));
}

@PostMapping(value = "/generate/stream", produces = MediaType.TEXT_EVENT_STREAM_VALUE)
public Flux<String> generateStream(@RequestBody Map<String, String> request) {
String prompt = request.get("prompt");

return ollamaWebClientService.generateStream(prompt);
}
}

5. Chat API 활용

ChatMessage.java

package com.example.dto;

import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;

@Data
@NoArgsConstructor
@AllArgsConstructor
public class ChatMessage {
private String role; // system, user, assistant
private String content;
}

ChatRequest.java

package com.example.dto;

import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;

import java.util.List;

@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class ChatRequest {
private String model;
private List<ChatMessage> messages;
private Boolean stream;
private Double temperature;
}

ChatResponse.java

package com.example.dto;

import com.fasterxml.jackson.annotation.JsonProperty;
import lombok.Data;

@Data
public class ChatResponse {
private String model;
private ChatMessage message;
private Boolean done;

@JsonProperty("total_duration")
private Long totalDuration;
}

OllamaChatService.java

package com.example.service;

import com.example.config.OllamaProperties;
import com.example.dto.ChatMessage;
import com.example.dto.ChatRequest;
import com.example.dto.ChatResponse;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import org.springframework.web.client.RestTemplate;

import java.util.ArrayList;
import java.util.List;

@Slf4j
@Service
@RequiredArgsConstructor
public class OllamaChatService {

private final OllamaProperties ollamaProperties;
private final RestTemplate restTemplate;

private final List<ChatMessage> conversationHistory = new ArrayList<>();

public String chat(String userMessage) {
String url = ollamaProperties.getBaseUrl() + "/api/chat";

// 사용자 메시지 추가
conversationHistory.add(new ChatMessage("user", userMessage));

ChatRequest request = ChatRequest.builder()
.model(ollamaProperties.getModel())
.messages(new ArrayList<>(conversationHistory))
.stream(false)
.temperature(0.7)
.build();

ChatResponse response = restTemplate.postForObject(url, request, ChatResponse.class);

if (response != null && response.getMessage() != null) {
String assistantMessage = response.getMessage().getContent();

// 어시스턴트 응답을 히스토리에 추가
conversationHistory.add(new ChatMessage("assistant", assistantMessage));

return assistantMessage;
}

return null;
}

public void clearHistory() {
conversationHistory.clear();
}

public void setSystemPrompt(String systemPrompt) {
conversationHistory.clear();
conversationHistory.add(new ChatMessage("system", systemPrompt));
}
}

6. 실전 활용 예시

1) 텍스트 요약 서비스

@Service
@RequiredArgsConstructor
public class TextSummarizationService {

private final OllamaService ollamaService;

public String summarize(String text) {
String prompt = String.format(
"다음 텍스트를 3문장으로 요약해주세요:\n\n%s",
text
);

return ollamaService.generate(prompt);
}
}

2) 코드 리뷰 서비스

@Service
@RequiredArgsConstructor
public class CodeReviewService {

private final OllamaChatService chatService;

@PostConstruct
public void init() {
chatService.setSystemPrompt(
"You are an expert code reviewer. " +
"Analyze the code and provide constructive feedback."
);
}

public String reviewCode(String code, String language) {
String prompt = String.format(
"Review this %s code:\n\n```%s\n%s\n```",
language, language, code
);

return chatService.chat(prompt);
}
}

3) Q&A 챗봇

@Service
@RequiredArgsConstructor
public class QnAService {

private final OllamaChatService chatService;
private final Map<String, List<ChatMessage>> userSessions = new ConcurrentHashMap<>();

public String answer(String userId, String question) {
// 사용자별 세션 관리
return chatService.chat(question);
}
}

7. 예외 처리 및 에러 핸들링

GlobalExceptionHandler.java

package com.example.exception;

import lombok.extern.slf4j.Slf4j;
import org.springframework.http.HttpStatus;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.ExceptionHandler;
import org.springframework.web.bind.annotation.RestControllerAdvice;
import org.springframework.web.client.ResourceAccessException;
import org.springframework.web.client.RestClientException;

import java.util.Map;

@Slf4j
@RestControllerAdvice
public class GlobalExceptionHandler {

@ExceptionHandler(ResourceAccessException.class)
public ResponseEntity<Map<String, String>> handleTimeout(ResourceAccessException ex) {
log.error("Ollama request timeout", ex);
return ResponseEntity.status(HttpStatus.REQUEST_TIMEOUT)
.body(Map.of("error", "Request timeout. The model took too long to respond."));
}

@ExceptionHandler(RestClientException.class)
public ResponseEntity<Map<String, String>> handleRestClientException(RestClientException ex) {
log.error("Error communicating with Ollama", ex);
return ResponseEntity.status(HttpStatus.SERVICE_UNAVAILABLE)
.body(Map.of("error", "Ollama service is unavailable."));
}
}

8. 테스트

OllamaServiceTest.java

package com.example.service;

import com.example.config.OllamaProperties;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;

import static org.assertj.core.api.Assertions.assertThat;

@SpringBootTest
class OllamaServiceTest {

@Autowired
private OllamaService ollamaService;

@Test
void testGenerate() {
String prompt = "What is the capital of France?";
String response = ollamaService.generate(prompt);

assertThat(response).isNotNull();
assertThat(response).containsIgnoringCase("Paris");
}
}

9. 성능 최적화

Connection Pool 설정

@Configuration
public class RestTemplateConfig {

@Bean
public RestTemplate restTemplate() {
HttpComponentsClientHttpRequestFactory factory =
new HttpComponentsClientHttpRequestFactory();

factory.setConnectTimeout(10000);
factory.setReadTimeout(300000);

// Connection pool 설정
PoolingHttpClientConnectionManager connectionManager =
new PoolingHttpClientConnectionManager();
connectionManager.setMaxTotal(100);
connectionManager.setDefaultMaxPerRoute(20);

CloseableHttpClient httpClient = HttpClients.custom()
.setConnectionManager(connectionManager)
.build();

factory.setHttpClient(httpClient);

return new RestTemplate(factory);
}
}

캐싱 적용

@Service
@RequiredArgsConstructor
public class CachedOllamaService {

private final OllamaService ollamaService;

@Cacheable(value = "ollama-responses", key = "#prompt")
public String generateWithCache(String prompt) {
return ollamaService.generate(prompt);
}
}

10. Docker Compose 설정

version: '3.8'

services:
ollama:
image: ollama/ollama:latest
ports:
- "11434:11434"
volumes:
- ollama-data:/root/.ollama
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]

spring-app:
build: .
ports:
- "8080:8080"
environment:
- OLLAMA_BASE_URL=http://ollama:11434
depends_on:
- ollama

volumes:
ollama-data:

참고 자료

마무리

스프링부트에서 Ollama를 사용하면:

  • 외부 API 비용 없이 로컬에서 LLM 활용 가능
  • 데이터 프라이버시 보장
  • 빠른 응답 속도 (네트워크 레이턴시 없음)
  • 다양한 오픈소스 모델 활용 가능

다만 충분한 서버 리소스(RAM, GPU)가 필요하므로 운영 환경에서는 리소스 요구사항을 고려해야 합니다.

Share