praison

Phixtral inference

Post author By praison
Post date January 12, 2024

Code to run mlabonne/phixtral-2x2_8

https://huggingface.co/mlabonne/phixtral-2x2_8 is recommended and mlabonne/phixtral-4x2_8 https://huggingface.co/mlabonne/phixtral-4x2_8 in 4-bit precision.

pip install -qqq --upgrade transformers einops accelerate bitsandbytes --progress-bar off

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "phixtral-2x2_8" # @param ["phixtral-2x2_8", "phixtral-4x2_8"]
instruction = "Write an epic poem about Ancient Rome." # @param {type:"string"}

prompt = f'''
<|im_start|>system
You are Phixtral, a helpful AI assistant.<|im_end|>
<|im_start|>user
{instruction}<|im_end|>
<|im_start|>assistant
'''

torch.set_default_device("cuda")

# Load the model and tokenizer
model = AutoModelForCausalLM.from_pretrained(
    f"mlabonne/{model_name}",
    torch_dtype="auto",
    load_in_4bit=True,
    trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained(
    f"mlabonne/{model_name}",
    trust_remote_code=True
)

# Tokenize the input string
inputs = tokenizer(
    prompt,
    return_tensors="pt",
    return_attention_mask=False
)

# Generate text using the model
outputs = model.generate(**inputs, max_length=200)

# Decode and print the output
text = tokenizer.batch_decode(outputs)[0]
print(text[len(prompt):])

RAG

Summarise Legal Documents using Mixtral + Jina AI

Post author By praison
Post date January 10, 2024

Prerequisites

You need a Jina AI key – [get a free one here](https://jina.ai/embeddings/).
You also need an [Hugging Face access token](https://huggingface.co/docs/hub/security-tokens)

pip install haystack-ai jina-haystack chroma-haystack pypdf transformers

export JINA_API_KEY=xxxxx
export HUGGINGFACE_API_KEY=xxxxx

from chroma_haystack.document_store import ChromaDocumentStore
from haystack import Pipeline
from haystack.components.fetchers import LinkContentFetcher
from haystack.components.converters import PyPDFToDocument
from haystack.components.writers import DocumentWriter
from haystack.components.preprocessors import DocumentCleaner
from haystack.components.preprocessors import DocumentSplitter
from chroma_haystack.retriever import ChromaEmbeddingRetriever
from haystack.document_stores import DuplicatePolicy
from jina_haystack.document_embedder import JinaDocumentEmbedder
from jina_haystack.text_embedder import JinaTextEmbedder

jina_api_key = os.environ.get('JINA_API_KEY')
hf_token = os.environ.get('HUGGINGFACE_API_KEY')

document_store = ChromaDocumentStore()
fetcher = LinkContentFetcher()
converter = PyPDFToDocument()

cleaner = DocumentCleaner(remove_repeated_substrings=True)
splitter = DocumentSplitter(split_by="word", split_length=500)
writer = DocumentWriter(document_store=document_store, policy=DuplicatePolicy.SKIP)
retriever = ChromaEmbeddingRetriever(document_store=document_store)
document_embedder = JinaDocumentEmbedder(api_key=jina_api_key, model_name="jina-embeddings-v2-base-en")

# Indexing pipeline

indexing_pipeline = Pipeline()
indexing_pipeline.add_component(instance=fetcher, name="fetcher")
indexing_pipeline.add_component(instance=converter, name="converter")
indexing_pipeline.add_component(instance=cleaner, name="cleaner")
indexing_pipeline.add_component(instance=splitter, name="splitter")
indexing_pipeline.add_component(instance=document_embedder, name="embedder")
indexing_pipeline.add_component(instance=writer, name="writer")

indexing_pipeline.connect("fetcher.streams", "converter.sources")
indexing_pipeline.connect("converter.documents", "cleaner.documents")
indexing_pipeline.connect("cleaner.documents", "splitter.documents")
indexing_pipeline.connect("splitter.documents", "embedder.documents")
indexing_pipeline.connect("embedder.documents", "writer.documents")

urls = ["https://cases.justia.com/federal/district-courts/california/candce/3:2020cv06754/366520/813/0.pdf"]

indexing_pipeline.run(data={"fetcher": {"urls": urls}})

# RAG pipeline

from haystack.components.generators import HuggingFaceTGIGenerator
from haystack.components.builders.prompt_builder import PromptBuilder

from jina_haystack.text_embedder import JinaTextEmbedder
prompt = """ Answer the question, based on the
content in the documents. If you can't answer based on the documents, say so.

Documents:
{% for doc in documents %}
  {{doc.content}}
{% endfor %}

question: {{question}}
"""

text_embedder = JinaTextEmbedder(api_key=jina_api_key, model_name="jina-embeddings-v2-base-en")
generator = HuggingFaceTGIGenerator("mistralai/Mixtral-8x7B-Instruct-v0.1", token=hf_token)
generator.warm_up()

prompt_builder = PromptBuilder(template=prompt)
rag = Pipeline()
rag.add_component("text_embedder",text_embedder)
rag.add_component(instance=prompt_builder, name="prompt_builder")
rag.add_component("retriever", retriever)
rag.add_component("generator", generator)

rag.connect("text_embedder.embedding", "retriever.query_embedding")
rag.connect("retriever.documents", "prompt_builder.documents")
rag.connect("prompt_builder.prompt", "generator.prompt")

# Ask Question

question = "Summarize what happened in Google v. Sonos"
# question ="What should Sonos have done differently?"

result = rag.run(data={"text_embedder":{"text": question},
                                  "retriever": {"top_k": 3},
                                  "prompt_builder":{"question": question},
                                  "generator": {"generation_kwargs": {"max_new_tokens": 350}}})

print(result['generator']['replies'][0])

RAG

Wikipedia Chatbot using Zypher 7B

Post author By praison
Post date January 10, 2024

# pip install wikipedia haystack-ai transformers accelerate bitsandbytes sentence_transformers rich

from pprint import pprint
import torch
import rich
import random
import wikipedia
from haystack.dataclasses import Document
from haystack.components.generators import HuggingFaceLocalGenerator
from haystack.components.builders import PromptBuilder
from haystack.components.retrievers import InMemoryEmbeddingRetriever

# Retrieve Wikipedia Pages

favourite_artists="""Britney Spears
Ricky Martin
Madonna
Justin Timberlake
Christina Aguilera
Jennifer Lopez
Shakira
Enrique Iglesias
Mariah Carey
Beyoncé""".split("\n")

raw_docs=[]

for title in favourite_artists:
    page = wikipedia.page(title=title, auto_suggest=False)
    doc = Document(content=page.content, meta={"title": page.title, "url":page.url})
    raw_docs.append(doc)

# Indexing Pipeline

from haystack import Pipeline
from haystack.document_stores import InMemoryDocumentStore
from haystack.components.preprocessors import DocumentCleaner, DocumentSplitter
from haystack.components.embedders import SentenceTransformersTextEmbedder, SentenceTransformersDocumentEmbedder
from haystack.components.writers import DocumentWriter
from haystack.components.writers.document_writer import DuplicatePolicy

document_store = InMemoryDocumentStore(embedding_similarity_function="cosine")

indexing = Pipeline()
indexing.add_component("cleaner", DocumentCleaner())
indexing.add_component("splitter", DocumentSplitter(split_by='sentence', split_length=2))
indexing.add_component("doc_embedder", SentenceTransformersDocumentEmbedder(model_name_or_path="thenlper/gte-large", device="cuda:0", meta_fields_to_embed=["title"]))
indexing.add_component("writer", DocumentWriter(document_store=document_store, policy=DuplicatePolicy.OVERWRITE))

indexing.connect("cleaner", "splitter")
indexing.connect("splitter", "doc_embedder")
indexing.connect("doc_embedder", "writer")

indexing.draw("indexing.png")
indexing.run({"cleaner":{"documents":raw_docs}})

# print(len(document_store.filter_documents()))
# pprint(document_store.filter_documents()[0].meta)
# pprint(document_store.filter_documents()[0])
# print(len(document_store.filter_documents()[0].embedding)) # embedding size

generator = HuggingFaceLocalGenerator("HuggingFaceH4/zephyr-7b-beta",
            huggingface_pipeline_kwargs={"device_map":"auto",
                          "model_kwargs":{"load_in_4bit":True,
                          "bnb_4bit_use_double_quant":True,
                          "bnb_4bit_quant_type":"nf4",
                          "bnb_4bit_compute_dtype":torch.bfloat16}},
            generation_kwargs={"max_new_tokens": 350})

generator.warm_up()

prompt_template = """<|system|>Using the information contained in the context, give a comprehensive answer to the question.
If the answer is contained in the context, also report the source URL.
If the answer cannot be deduced from the context, do not give an answer.</s>
<|user|>
Context:
  {% for doc in documents %}
  {{ doc.content }} URL:{{ doc.meta['url'] }}
  {% endfor %};
  Question: {{query}}
  </s>
<|assistant|>
"""
prompt_builder = PromptBuilder(template=prompt_template)

rag = Pipeline()
rag.add_component("text_embedder", SentenceTransformersTextEmbedder(model_name_or_path="thenlper/gte-large", device="cuda:0"))
rag.add_component("retriever", InMemoryEmbeddingRetriever(document_store=document_store, top_k=5))
rag.add_component("prompt_builder", prompt_builder)
rag.add_component("llm", generator)

rag.connect("text_embedder", "retriever")
rag.connect("retriever.documents", "prompt_builder.documents")
rag.connect("prompt_builder.prompt", "llm.prompt")

rag.draw("rag.png")

def get_generative_answer(query):
  results = rag.run({
      "text_embedder": {"text": query},
      "prompt_builder": {"query": query}
    }
  )
  answer = results["llm"]["replies"][0]
  rich.print(answer)

# Ask Question
get_generative_answer("What are some of Jennifer Lopez's most successful singles?")

RAG

Healthcare Chatbot with Mixtral 8x7b

Post author By praison
Post date January 9, 2024

import os

# Get Hugging Face API Key
huggingface_token = os.environ.get('HUGGINGFACE_API_KEY')

# PubMed Fetcher
from pymed import PubMed
from typing import List
from haystack import component
from haystack import Document

pubmed = PubMed(tool="Mervin.Praison", email="test@test.com")

def documentize(article):
    return Document(content=article.abstract, meta={'title': article.title, 'keywords': article.keywords})

@component
class PubMedFetcher():
    @component.output_types(articles=List[Document])
    def run(self, queries: list[str]):
        cleaned_queries = queries[0].strip().split('\n')
        articles = []
        try:
            for query in cleaned_queries:
                response = pubmed.query(query, max_results=1)
                documents = [documentize(article) for article in response]
                articles.extend(documents)
        except Exception as e:
            print(e)
            print(f"Couldn't fetch articles for queries: {queries}")
        results = {'articles': articles}
        return results

# Pipeline Setup
from haystack.components.generators import HuggingFaceTGIGenerator
from haystack import Pipeline
from haystack.components.builders.prompt_builder import PromptBuilder

keyword_llm = HuggingFaceTGIGenerator("mistralai/Mixtral-8x7B-Instruct-v0.1", token=huggingface_token)
keyword_llm.warm_up()

llm = HuggingFaceTGIGenerator("mistralai/Mixtral-8x7B-Instruct-v0.1", token=huggingface_token)
llm.warm_up()

from haystack import Pipeline
from haystack.components.builders.prompt_builder import PromptBuilder

keyword_prompt_template = """
Your task is to convert the follwing question into 3 keywords that can be used to find relevant medical research papers on PubMed.
Here is an examples:
question: "What are the latest treatments for major depressive disorder?"
keywords:
Antidepressive Agents
Depressive Disorder, Major
Treatment-Resistant depression
---
question: {{ question }}
keywords:
"""

prompt_template = """
Answer the question truthfully based on the given documents.
If the documents don't contain an answer, use your existing knowledge base.

q: {{ question }}
Articles:
{% for article in articles %}
  {{article.content}}
  keywords: {{article.meta['keywords']}}
  title: {{article.meta['title']}}
{% endfor %}
"""

keyword_prompt_builder = PromptBuilder(template=keyword_prompt_template)
prompt_builder = PromptBuilder(template=prompt_template)
fetcher = PubMedFetcher()

pipe = Pipeline()
pipe.add_component("keyword_prompt_builder", keyword_prompt_builder)
pipe.add_component("keyword_llm", keyword_llm)
pipe.add_component("pubmed_fetcher", fetcher)
pipe.add_component("prompt_builder", prompt_builder)
pipe.add_component("llm", llm)

pipe.connect("keyword_prompt_builder.prompt", "keyword_llm.prompt")
pipe.connect("keyword_llm.replies", "pubmed_fetcher.queries")
pipe.connect("pubmed_fetcher.articles", "prompt_builder.articles")
pipe.connect("prompt_builder.prompt", "llm.prompt")

# Function to Ask Questions
def ask(question):
    output = pipe.run(data={"keyword_prompt_builder":{"question":question},
                            "prompt_builder":{"question": question},
                            "llm":{"generation_kwargs": {"max_new_tokens": 500}}})
    print(question)
    print(output['llm']['replies'][0])

# Example Usage
ask("What are the most current treatments for post-acute COVID aka PACS or long COVID?")

RAG

Orca 2 RAG using TxtAI

Post author By praison
Post date January 6, 2024

pip install "txtai[all]" einops autoawq

# Section 1: Setup LLM 
from txtai.pipeline import LLM, Textractor
from txtai import Embeddings
import os
import nltk
nltk.download('punkt')

llm = LLM("TheBloke/Orca-2-13B-AWQ", trust_remote_code=True)
print("Before RAG:")
print(llm("Tell me about Mervin Praison in one line"))

print("\n------------------\n")

# Section 2: Build RAG Pipeline with Vector Search

def execute(question, context):
  prompt = f"""system
  You are a friendly assistant. You answer questions from users.
  user
  Answer the following question using only the context below. Only include information specifically discussed.

  question: {question}
  context: {context} 
  assistant
  """
  return llm(prompt, maxlength=6096, pad_token_id=32000)

def stream(path):
  for f in sorted(os.listdir(path)):
    if f.endswith(("docx", "xlsx", "pdf", "txt")):
      fpath = os.path.join(path, f)
      for paragraph in textractor(fpath):
        yield paragraph

textractor = Textractor(paragraphs=True)
embeddings = Embeddings(content=True)
docs_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data')
embeddings.index(stream(docs_path))

def context(question):
  return "\n".join(x["text"] for x in embeddings.search(question))

def rag(question):
  return execute(question, context(question))

print("After RAG:")
print(rag("Tell me about Mervin Praison in one line"))

With Citation

# Section 1: Create LLM and Textractor Pipelines
from txtai.pipeline import LLM, Textractor
import os
import nltk
nltk.download('punkt')

llm = LLM("TheBloke/Orca-2-13B-AWQ", trust_remote_code=True)
textractor = Textractor()

docs_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data')

text = textractor(os.path.join(docs_path, "/home/praison/data.txt"))
question = "Tell me about Mervin Praison in one line"
prompt = f"""system
You are a friendly assistant. You answer questions from users.
user
Answer the following question using only the context below. Only include information specifically discussed.

question: {question}
context: {text} 
assistant
"""

print(llm(prompt, maxlength=6096, pad_token_id=32000))

# Section 2: Build RAG Pipeline with Vector Search
import os
from txtai import Embeddings

def execute(question, text):
  prompt = f"""system
  You are a friendly assistant. You answer questions from users.
  user
  Answer the following question using only the context below. Only include information specifically discussed.

  question: {question}
  context: {text} 
  assistant
  """
  return llm(prompt, maxlength=6096, pad_token_id=32000)

def stream(path):
  for f in sorted(os.listdir(path)):
    if f.endswith(("docx", "xlsx", "pdf", "txt")):
      fpath = os.path.join(path, f)
      for paragraph in textractor(fpath):
        yield paragraph

textractor = Textractor(paragraphs=True)
embeddings = Embeddings(content=True)
embeddings.index(stream(docs_path))

def context(question):
  return "\n".join(x["text"] for x in embeddings.search(question))

def rag(question):
  return execute(question, context(question))

print("RAG Pipeline Result:")
print(rag("Tell me about Mervin Praison in one line"))

# Section 3: Implement Citations for LLMs
for x in embeddings.search(rag("Tell me about Mervin Praison in one line")):
    print(x)
    print(x["text"])

# Section 4: Create Extractor Pipeline for Citations
from txtai.pipeline import Extractor

def prompt(question):
  return [{
    "query": question,
    "question": f"""
    Answer the following question using only the context below. Only include information specifically discussed.

    question: {question}
    context:
    """
  }]

llm = LLM("TheBloke/Orca-2-13B-AWQ", template="""system
You are a friendly assistant. You answer questions from users.
user
{text} 
assistant
""")

extractor = Extractor(embeddings, llm, output="reference")
result = extractor(prompt("Tell me about Mervin Praison in one line"), maxlength=4096, pad_token_id=32000)[0]
print("ANSWER:", result["answer"])
print("CITATION:", embeddings.search("select id, text from txtai where id = :id", limit=1, parameters={"id": result["reference"]}))

../data/data.txt

Mervin Praison is an AI, Senior DevOps, and Site Reliability Engineer with a diverse range of technical interests and expertise. He has contributed to various projects and topics such as differences between keyword, sparse, and dense vector indexes, and has developed content on TensorFlow & Keras, focusing on models, optimizers, and activation functions. Praison also demonstrates proficiency in programming and system setup, evident in his guides on AutoGen Agent Training using AgentOptimizer and LocalAI Setup. Additionally, his GitHub activity reflects a consistent contribution pattern in software development and engineering

RAG

Flask App Serving Mistral + Llama Index + Ollama + ChromaDB

Post author By praison
Post date January 6, 2024

# Import modules
import chromadb
from flask import Flask, request, jsonify
from flask_cors import CORS, cross_origin
from llama_index import VectorStoreIndex, ServiceContext
from llama_index.llms import Ollama
from llama_index.vector_stores.chroma import ChromaVectorStore

# Create Chroma DB client and access the existing vector store
client = chromadb.PersistentClient(path="./chroma_db_data")
chroma_collection = client.get_collection(name="tweets")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)

# Initialize Ollama and ServiceContext
llm = Ollama(model="mistral")
service_context = ServiceContext.from_defaults(llm=llm, embed_model="local")

# Create VectorStoreIndex with a similarity threshold of 20
index = VectorStoreIndex.from_vector_store(vector_store=vector_store, service_context=service_context, similarity_top_k=20)

# Set up Flask server
app = Flask(__name__)
cors = CORS(app)
app.config['CORS_HEADERS'] = 'Content-Type'

@app.route('/')
def hello_world():
    return 'Hello, World!'

@app.route('/chat', methods=['GET', 'POST'])
@cross_origin()
def chat():
    query = request.args.get('query') if request.method == 'GET' else request.form.get('query')
    if query is not None:
        query_engine = index.as_query_engine()
        response = query_engine.query(query)
        return jsonify({"response": str(response)})
    else:
        return jsonify({"error": "query field is missing"}), 400
    
if __name__ == '__main__':
    app.run()

RAG

Haystack AI to Create RAG Pipeline

Post author By praison
Post date January 6, 2024

pip install haystack-ai datasets ollama-haystack gradio

ollama run mistral

# Importing required libraries
from datasets import load_dataset
from haystack import Document
from haystack.document_stores import InMemoryDocumentStore
from haystack.components.retrievers import InMemoryBM25Retriever
from haystack.components.builders import PromptBuilder
from ollama_haystack import OllamaGenerator
from haystack.pipeline import Pipeline

# Load dataset and create documents
dataset = load_dataset("bilgeyucel/seven-wonders", split="train")
docs = [Document(content=doc["content"], meta=doc["meta"]) for doc in dataset]

# Initialize document store and write documents
document_store = InMemoryDocumentStore()
document_store.write_documents(docs)

# Initialize retriever
retriever = InMemoryBM25Retriever(document_store)

# Define prompt template
template = """
Given the following information, answer the question.

Context:
{% for document in documents %}
    {{ document.content }}
{% endfor %}

Question: {{question}}
Answer:
"""

# Initialize prompt builder
prompt_builder = PromptBuilder(template=template)

# Initialize Ollama generator
generator = OllamaGenerator(
    model="mistral",
    url = "http://localhost:11434/api/generate",
    generation_kwargs={
        "num_predict": 100,
        "temperature": 0.9,
    }
)

# Create and configure pipeline
basic_rag_pipeline = Pipeline()
basic_rag_pipeline.add_component("retriever", retriever)
basic_rag_pipeline.add_component("prompt_builder", prompt_builder)
basic_rag_pipeline.add_component("llm", generator)
basic_rag_pipeline.connect("retriever", "prompt_builder.documents")
basic_rag_pipeline.connect("prompt_builder", "llm")

# Run the pipeline with a sample question
question = "What does Rhodes Statue look like?"
response = basic_rag_pipeline.run(
    {
        "retriever": {"query": question}, 
        "prompt_builder": {"question": question}
    }
)
print(response["llm"]["replies"][0])

User Interface

# Importing required libraries
from datasets import load_dataset
from haystack import Document
from haystack.document_stores import InMemoryDocumentStore
from haystack.components.retrievers import InMemoryBM25Retriever
from haystack.components.builders import PromptBuilder
from ollama_haystack import OllamaGenerator
from haystack.pipeline import Pipeline
import gradio as gr

# Load dataset and create documents
dataset = load_dataset("bilgeyucel/seven-wonders", split="train")
docs = [Document(content=doc["content"], meta=doc["meta"]) for doc in dataset]

# Initialize document store and write documents
document_store = InMemoryDocumentStore()
document_store.write_documents(docs)

# Initialize retriever
retriever = InMemoryBM25Retriever(document_store)

# Define prompt template
template = """
Given the following information, answer the question.

Context:
{% for document in documents %}
    {{ document.content }}
{% endfor %}

Question: {{question}}
Answer:
"""

# Initialize prompt builder
prompt_builder = PromptBuilder(template=template)

# Initialize Ollama generator
generator = OllamaGenerator(
    model="mistral",
    url = "http://localhost:11434/api/generate",
    generation_kwargs={
        "num_predict": 100,
        "temperature": 0.9,
    }
)

# Create and configure pipeline
basic_rag_pipeline = Pipeline()
basic_rag_pipeline.add_component("retriever", retriever)
basic_rag_pipeline.add_component("prompt_builder", prompt_builder)
basic_rag_pipeline.add_component("llm", generator)
basic_rag_pipeline.connect("retriever", "prompt_builder.documents")
basic_rag_pipeline.connect("prompt_builder", "llm")

# Visualize pipeline (optional)
# basic_rag_pipeline.draw("basic-rag-pipeline.png")

# Define function to run pipeline with Gradio
def ask_question(question):
    response = basic_rag_pipeline.run(
        {
            "retriever": {"query": question}, 
            "prompt_builder": {"question": question}
        }
    )
    return response["llm"]["replies"][0]

# Create Gradio interface
gr_interface = gr.Interface(
    fn=ask_question,
    inputs=gr.components.Textbox(lines=2, placeholder="Enter your question here..."),
    outputs="text"
)

gr_interface.launch()

RAG

QA Database using Embeddings

Post author By praison
Post date January 6, 2024

from datasets import load_dataset
from txtai import Embeddings

# Load dataset
ds = load_dataset("web_questions", split="train")

# Initialize and index embeddings
embeddings = Embeddings(path="sentence-transformers/nli-mpnet-base-v2", content=True)
embeddings.index([(uid, {"url": row["url"], "text": row["question"], 
                  "answer": ", ".join(row["answers"])}, None) 
                  for uid, row in enumerate(ds)])

# Save the embeddings
embeddings.save("questions.tar.gz")

# Search embeddings
def question(text):
    return embeddings.search(f"select text, answer, score from txtai where similar('{text}') limit 1")

print(question("Tell me an animal found offshore in Florida"))

User Interface

import gradio as gr
from txtai.app import Application

# Initialize the saved embeddings
app = Application("path: questions.tar.gz")

def search_question(query):
    results = app.search(f"select text, answer, score from txtai where similar('{query}') limit 1")[0]
    return results['text'], results['answer'], results['score']

interface = gr.Interface(
    fn=search_question,
    inputs=gr.components.Textbox(label="Enter your query"),
    outputs=[gr.components.Textbox(label="Question"),gr.components.Textbox(label="Answer"),gr.components.Number(label="Similarity")]
)

interface.launch()

RAG

RAG with ChromaDB + Llama Index + Ollama + CSV

Post author By praison
Post date January 6, 2024

curl https://ollama.ai/install.sh | sh
ollama serve

ollama run mixtral

pip install llama-index torch transformers chromadb

Section 1:

# Import modules
from llama_index.llms import Ollama
from pathlib import Path
import chromadb
from llama_index import VectorStoreIndex, ServiceContext, download_loader
from llama_index.storage.storage_context import StorageContext
from llama_index.vector_stores.chroma import ChromaVectorStore

# Load CSV data
SimpleCSVReader = download_loader("SimpleCSVReader")
loader = SimpleCSVReader(encoding="utf-8")
documents = loader.load_data(file=Path('./fine_food_reviews_1k.csv'))

# Create Chroma DB client and store
client = chromadb.PersistentClient(path="./chroma_db_data")
chroma_collection = client.create_collection(name="reviews")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

# Initialize Ollama and ServiceContext
llm = Ollama(model="mixtral")
service_context = ServiceContext.from_defaults(llm=llm, embed_model="local")

# Create VectorStoreIndex and query engine
index = VectorStoreIndex.from_documents(documents, service_context=service_context, storage_context=storage_context)
query_engine = index.as_query_engine()

# Perform a query and print the response
response = query_engine.query("What are the thoughts on food quality?")
print(response)

Section 2:

# Import modules
import chromadb
from llama_index import VectorStoreIndex, ServiceContext
from llama_index.llms import Ollama
from llama_index.vector_stores.chroma import ChromaVectorStore

# Create Chroma DB client and access the existing vector store
client = chromadb.PersistentClient(path="./chroma_db_data")
chroma_collection = client.get_collection(name="reviews")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)

# Initialize Ollama and ServiceContext
llm = Ollama(model="mixtral")
service_context = ServiceContext.from_defaults(llm=llm, embed_model="local")

# Create VectorStoreIndex and query engine with a similarity threshold of 20
index = VectorStoreIndex.from_vector_store(vector_store=vector_store, service_context=service_context, similarity_top_k=20)
query_engine = index.as_query_engine()

# Perform a query and print the response
response = query_engine.query("What are the thoughts on food quality?")
print(response)

6bca48b1-fine_food_reviews Download

RAG

RAG with ChromaDB + Llama Index + Ollama

Post author By praison
Post date January 6, 2024

pip install llama-index torch transformers chromadb

# Import modules
from llama_index.llms import Ollama
from pathlib import Path
import chromadb
from llama_index import VectorStoreIndex, ServiceContext, download_loader
from llama_index.storage.storage_context import StorageContext
from llama_index.vector_stores.chroma import ChromaVectorStore

# Load JSON data
JSONReader = download_loader("JSONReader")
loader = JSONReader()
documents = loader.load_data(Path('tinytweets.json'))

# Create Chroma DB client and store
client = chromadb.PersistentClient(path="./chroma_db_data")
chroma_collection = client.create_collection(name="tweets")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

# Initialize Ollama and ServiceContext
llm = Ollama(model="mistral")
service_context = ServiceContext.from_defaults(llm=llm, embed_model="local")

# Create VectorStoreIndex and query engine
index = VectorStoreIndex.from_documents(documents, service_context=service_context, storage_context=storage_context)
query_engine = index.as_query_engine()

# Perform a query and print the response
response = query_engine.query("What does the author think about Star Trek? In One line")
print(response)

# Import modules
import chromadb
from llama_index import VectorStoreIndex, ServiceContext
from llama_index.llms import Ollama
from llama_index.vector_stores.chroma import ChromaVectorStore

# Create Chroma DB client and access the existing vector store
client = chromadb.PersistentClient(path="./chroma_db_data")
chroma_collection = client.get_collection(name="tweets")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)

# Initialize Ollama and ServiceContext
llm = Ollama(model="mistral")
service_context = ServiceContext.from_defaults(llm=llm, embed_model="local")

# Create VectorStoreIndex and query engine with a similarity threshold of 20
index = VectorStoreIndex.from_vector_store(vector_store=vector_store, service_context=service_context, similarity_top_k=20)
query_engine = index.as_query_engine()

# Perform a query and print the response
response = query_engine.query("What does the author think about Star Trek? In One line")
print(response)