Building a RAG with Astro, FastAPI, SurrealDB and Llama 3.1

Technology	Type	Description
FastAPI	Framework	A high performance framework to build APIs with Python 3.8+.
Astro	Framework	Framework for building fast, modern websites with serverless backend support.
TailwindCSS	Framework	CSS framework for building custom designs.
SurrealDB	Platform	A multi-model database platform.
Fireworks	Platform	Lightning-fast Inference platform to run generative AI models.

curl --proto '=https' --tlsv1.2 -sSf https://install.surrealdb.com | sh

surreal start --log trace --user root --pass root --bind 0.0.0.0:4304 file:mydatabase.db

export FIREWORKS_API_KEY="<YOUR_FIREWORKS_API_KEY>"

# Create and move to the new directory
mkdir chat-streaming
cd chat-streaming

pip install surrealdb
pip install fireworks-ai
pip install langchain langchain-community langchain_fireworks
pip install fastapi "uvicorn[standard]"

import uuid, os
from typing import List
 
# FastAPI
from fastapi import FastAPI
from pydantic import BaseModel
 
## Streaming Response utility
from fastapi.responses import StreamingResponse
 
## Enable CORS utility
from fastapi.middleware.cors import CORSMiddleware
 
# Fireworks SDK
import fireworks.client
 
# SurrealDB Vector Store SDK for LangChain
from langchain_community.vectorstores import SurrealDBStore
 
# Fireworks Embeddings Integration via LangChain
from langchain_fireworks import FireworksEmbeddings

# Class representing the string of messages to be searched and embedded as system context.
class LearningMessages(BaseModel):
    messages: str
 
# Class representing a single message of the conversation between RAG application and user.
class Message(BaseModel):
    role: str
    content: str
 
# Class representing collection of messages above.
class Messages(BaseModel):
    messages: List[Message]

# Set the Fireworks API Key
fireworks.client.api_key = os.environ["FIREWORKS_API_KEY"]

# Load the nomic-embed-text-v1.5 embedding models via Langchain Fireworks Integration
embeddings = FireworksEmbeddings(model="nomic-ai/nomic-embed-text-v1.5",fireworks_api_key=os.getenv("FIREWORKS_API_KEY"))

dburl = "ws://localhost:4304/rpc"
db_user = "root"
db_pass = "root"
vector_collection = "vectors"
vector_db = SurrealDBStore(dburl=dburl, db_user=db_user, db_pass=db_pass, collection=vector_collection, embedding_function=embeddings)

# Initialize FastAPI App
app = FastAPI()
 
# Add CORS middleware
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

@app.post('/update')
async def update(messages: LearningMessages):
    messages_json = messages.model_dump()["messages"].split(',')
    # Initialize SurrealDB
    await vector_db.initialize()
    # Create texts to be inserted into the Vector Store (Embeddings are generated automatically)
    metadatas = [{"len": len(t)} for t in messages_json]
    ids = [str(uuid.uuid4()) for _ in messages_json]
    await vector_db.aadd_texts(messages_json, metadatas=metadatas, ids=ids)

@app.post("/chat")
async def chat(messages: Messages):
    messages_json = (messages.model_dump())['messages']
    # Initialize SurrealDB
    await vector_db.initialize()
    # Create System Context
    knowledge = "Only answer what you know. If do not know, say it's an unknown. Following are the things you know of:\n"
    relevant_content = await vector_db.asimilarity_search(messages_json[-1]['content'])
    if relevant_content:
        for each_content in relevant_content:
            knowledge += each_content.page_content
    messages_json.insert(0, { "role": "system", "content": knowledge })
    # Create LLAMA Completion Responses
    response = fireworks.client.ChatCompletion.create(
        stream=True,
        prompt_or_messages=messages_json,
        model="accounts/fireworks/models/llama-v3p1-70b-instruct",
    )
    # Stream the response from requests.post
    return StreamingResponse(yield_content(response))

# Function to yield content from each choice
def yield_content(response):
    for chunk in response:
        if chunk.choices[0].delta.content:
            yield chunk.choices[0].delta.content

import uuid, os
from typing import List
 
# FastAPI
from fastapi import FastAPI
from pydantic import BaseModel
## Streaming Response utility
from fastapi.responses import StreamingResponse
## Enable CORS utility
from fastapi.middleware.cors import CORSMiddleware
 
# Fireworks SDK
import fireworks.client
 
# SurrealDB Vector Store SDK for LangChain
from langchain_community.vectorstores import SurrealDBStore
 
# Fireworks Embeddings Integration via LangChain
from langchain_fireworks import FireworksEmbeddings
 
# Class representing the string of messages to be searched and embedded as system context.
class LearningMessages(BaseModel):
    messages: str
 
# Class representing a single message of the conversation between RAG application and user.
class Message(BaseModel):
    role: str
    content: str
 
# Class representing collection of messages above.
class Messages(BaseModel):
    messages: List[Message]
 
# Set the Fireworks API Key
fireworks.client.api_key = os.getenv("FIREWORKS_API_KEY")
 
# Load the nomic-embed-text-v1.5 embedding models via Langchain Fireworks Integration
embeddings = FireworksEmbeddings(model="nomic-ai/nomic-embed-text-v1.5",fireworks_api_key=os.getenv("FIREWORKS_API_KEY"))
 
 
dburl = "ws://localhost:4304/rpc"
db_user = "root"
db_pass = "root"
vector_collection = "vectors"
vector_db = SurrealDBStore(dburl=dburl, db_user=db_user, db_pass=db_pass, collection=vector_collection, embedding_function=embeddings)
 
app = FastAPI()
 
# Add CORS middleware
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)
 
# Function to yield content from each choice
def yield_content(response):
    for chunk in response:
        if chunk.choices[0].delta.content:
            yield chunk.choices[0].delta.content
 
@app.post('/update')
async def update(messages: LearningMessages):
    messages_json = messages.model_dump()["messages"].split(',')
    # Initialize SurrealDB
    await vector_db.initialize()
    # Create texts to be inserted into the Vector Store (Embeddings are generated automatically)
    metadatas = [{"len": len(t)} for t in messages_json]
    ids = [str(uuid.uuid4()) for _ in messages_json]
    await vector_db.aadd_texts(messages_json, metadatas=metadatas, ids=ids)
    
@app.post("/chat")
async def chat(messages: Messages):
    messages_json = (messages.model_dump())['messages']
    # Initialize SurrealDB
    await vector_db.initialize()
    # Create System Context
    knowledge = "Only answer what you know. If do not know, say it's an unknown. Following are the things you know of:\n"
    relevant_content = await vector_db.asimilarity_search(messages_json[-1]['content'])
    if relevant_content:
        for each_content in relevant_content:
            knowledge += each_content.page_content
    messages_json.insert(0, { "role": "system", "content": knowledge })
    # Create LLAMA Completion Responses
    response = fireworks.client.ChatCompletion.create(
        stream=True,
        prompt_or_messages=messages_json,
        model="accounts/fireworks/models/llama-v3p1-405b-instruct",
    )
    # Stream the response from requests.post
    return StreamingResponse(yield_content(response))

uvicorn main:app --reload

npm create astro@latest chat-ui

cd chat-ui
npm run dev

npx astro add tailwind

npx astro add react

npm install ai axios

// File: src/Chat.jsx
 
import { useChat } from 'ai/react'
 
export default function () {
  const { messages, handleSubmit, input, handleInputChange } = useChat({
    api: 'http://localhost:8000/chat',
  })
  return (
    <form className="mt-12 flex w-full max-w-[300px] flex-col" onSubmit={handleSubmit}>
      <input
        id="input"
        name="prompt"
        value={input}
        onChange={handleInputChange}
        placeholder="What's your next question?"
        className="mt-3 rounded border px-2 py-1 outline-none focus:border-black"
      />
      <button className="mt-3 max-w-max rounded border px-3 py-1 outline-none hover:bg-black hover:text-white" type="submit">
        Ask &rarr;
      </button>
      {messages.map((message, i) => (
        <div className="mt-3 border-t pt-3" key={i}>
          {message.content}
        </div>
      ))}
    </form>
  )
}

// File: src/Update.jsx
 
import axios from 'axios'
import { useState } from 'react'
 
export default function () {
  const [messages, setMessages] = useState('')
  return (
    <form
      className="mt-12 flex w-full max-w-[300px] flex-col"
      onSubmit={(e) => {
        e.preventDefault()
        axios.post('http://localhost:8000/update', {
          messages,
        })
      }}
    >
      <textarea
        value={messages}
        id="learn_messages"
        name="learn_messages"
        onChange={(e) => setMessages(e.target.value)}
        placeholder="Things to learn [seperated by comma (,)]"
        className="mt-3 rounded border px-2 py-1 outline-none focus:border-black"
      />
      <button className="mt-3 max-w-max rounded border px-3 py-1 outline-none hover:bg-black hover:text-white" type="submit">
        Learn &rarr;
      </button>
    </form>
  )
}

---
+ import Chat from '../Chat'
+ import Update from '../Update'
---
 
<html lang="en">
  <head>
    <meta charset="utf-8" />
    <link rel="icon" type="image/svg+xml" href="/favicon.svg" />
    <meta name="viewport" content="width=device-width" />
    <meta name="generator" content={Astro.generator} />
    <title>Astro</title>
  </head>
  <body class="flex w-screen flex-col items-center">
-   <h1>Astro</h1>
+   <Update client:load />
+   <Chat client:load />
  </body>
</html>

npm run build && npm run preview

Building a RAG with Astro, FastAPI, SurrealDB and Llama 3.1

Prerequisites

Tech Stack

High-Level Data Flow and Operations

Step 1: Setup SurrealDB Server

Step 2: Generate Fireworks AI API Key

Step 3: Create a new FastAPI application

Install Dependencies

Define Data Models using Pydantic

Use Fireworks API Key

Use Fireworks Nomic AI Embeddings Model

Define SurrealDB Vector Store

Initialize FastAPI App

Create a Knowledge Update API endpoint

Create a Chat API endpoint

Run FastAPI App Locally

Create a new Astro application

Add Tailwind CSS to the application

Integrate React in your Astro project

Install an AI SDK and Axios

Build Conversation User Interface

Build User Interface to Update Application’s Knowledge

Run Astro Application Locally

Conclusion