Categories
Linux

Install Mini Conda Linux

# create a directory to install minicaonda in
mkdir -p ~/miniconda3

# download latest miniconda version
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda3/miniconda.sh

# run the install script
bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3

# delete the intall script
rm -rf ~/miniconda3/miniconda.sh

# add a conda initialize to your bash
~/miniconda3/bin/conda init bash

# Verify the installaton 
conda list
Categories
AI Agents

CrewAI Groq Create Story Books

conda create -n crewai python=3.11
conda activate crewai
pip install -U 'crewai[tools]' mdpdf
export OPENAI_API_KEY=xxxxxxxxxx
export GROQ_API_KEY=xxxxxxxxxxxx
from crewai import Agent, Task, Crew, Process
from crewai_tools import tool
from langchain_openai import ChatOpenAI
from crewai_tools.tools import FileReadTool
import os, requests, re, mdpdf, subprocess
from openai import OpenAI

llm = ChatOpenAI(
    openai_api_base="https://api.groq.com/openai/v1", # https://api.openai.com/v1 or https://api.groq.com/openai/v1 
    openai_api_key=os.getenv("GROQ_API_KEY"), # os.getenv("OPENAI_API_KEY") or os.getenv("GROQ_API_KEY")
    model_name="mixtral-8x7b-32768" #  gpt-4-turbo-preview or mixtral-8x7b-32768 
)

file_read_tool = FileReadTool(
	file_path='template.md',
	description='A tool to read the Story Template file and understand the expected output format.'
)

@tool
def generateimage(chapter_content_and_character_details: str) -> str:
    """
    Generates an image for a given chapter number, chapter content, detailed location details and character details.
    Using the OpenAI image generation API,
    saves it in the current folder, and returns the image path.
    """
    client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

    response = client.images.generate(
        model="dall-e-3",
        prompt=f"Image is about: {chapter_content_and_character_details}. Style: Illustration. Create an illustration incorporating a vivid palette with an emphasis on shades of azure and emerald, augmented by splashes of gold for contrast and visual interest. The style should evoke the intricate detail and whimsy of early 20th-century storybook illustrations, blending realism with fantastical elements to create a sense of wonder and enchantment. The composition should be rich in texture, with a soft, luminous lighting that enhances the magical atmosphere. Attention to the interplay of light and shadow will add depth and dimensionality, inviting the viewer to delve into the scene. DON'T include ANY text in this image. DON'T include colour palettes in this image.",
        size="1024x1024",
        quality="standard",
        n=1,
    )
    image_url = response.data[0].url
    words = chapter_content_and_character_details.split()[:5] 
    safe_words = [re.sub(r'[^a-zA-Z0-9_]', '', word) for word in words]  
    filename = "_".join(safe_words).lower() + ".png"
    filepath = os.path.join(os.getcwd(), filename)

    # Download the image from the URL
    image_response = requests.get(image_url)
    if image_response.status_code == 200:
        with open(filepath, 'wb') as file:
            file.write(image_response.content)
    else:
        print("Failed to download the image.")
        return ""

    return filepath

@tool
def convermarkdowntopdf(markdownfile_name: str) -> str:
    """
    Converts a Markdown file to a PDF document using the mdpdf command line application.

    Args:
        markdownfile_name (str): Path to the input Markdown file.

    Returns:
        str: Path to the generated PDF file.
    """
    output_file = os.path.splitext(markdownfile_name)[0] + '.pdf'
    
    # Command to convert markdown to PDF using mdpdf
    cmd = ['mdpdf', '--output', output_file, markdownfile_name]
    
    # Execute the command
    subprocess.run(cmd, check=True)
    
    return output_file

story_outliner = Agent(
  role='Story Outliner',
  goal='Develop an outline for a children\'s storybook about Animals, including chapter titles and characters for 5 chapters.',
  backstory="An imaginative creator who lays the foundation of captivating stories for children.",
  verbose=True,
  llm=llm,
  allow_delegation=False
)

story_writer = Agent(
  role='Story Writer',
  goal='Write the full content of the story for all 5 chapters, each chapter 100 words, weaving together the narratives and characters outlined.',
  backstory="A talented storyteller who brings to life the world and characters outlined, crafting engaging and imaginative tales for children.",
  verbose=True,
  llm=llm,
  allow_delegation=False
)

image_generator = Agent(
  role='Image Generator',
  goal='Generate one image per chapter content provided by the story outliner. Start with Chapter number, chapter content, character details, detailed location information and detailed items in the location where the activity happens. Generate totally 5 images one by one. Final output should contain all the 5 images in json format.',
  backstory="A creative AI specialized in visual storytelling, bringing each chapter to life through imaginative imagery.",
  verbose=True,
  llm=llm,
  tools=[generateimage],
  allow_delegation=False
)

content_formatter = Agent(
    role='Content Formatter',
    goal='Format the written story content in markdown, including images at the beginning of each chapter.',
    backstory='A meticulous formatter who enhances the readability and presentation of the storybook.',
    verbose=True,
    llm=llm,
    tools=[file_read_tool],
    allow_delegation=False
)

markdown_to_pdf_creator = Agent(
    role='PDF Converter',
    goal='Convert the Markdown file to a PDF document. story.md is the markdown file name.',
    backstory='An efficient converter that transforms Markdown files into professionally formatted PDF documents.',
    verbose=True,
    llm=llm,
    tools=[convermarkdowntopdf],
    allow_delegation=False
)


# Create tasks for the agents
task_outline = Task(
    description='Create an outline for the children\'s storybook about Animals, detailing chapter titles and character descriptions for 5 chapters.',
    agent=story_outliner,
    expected_output='A structured outline document containing 5 chapter titles, with detailed character descriptions and the main plot points for each chapter.'
)

task_write = Task(
    description='Using the outline provided, write the full story content for all chapters, ensuring a cohesive and engaging narrative for children. Each Chapter 100 words. Include Title of the story at the top.',
    agent=story_writer,
    expected_output='A complete manuscript of the children\'s storybook about Animals with 5 chapters. Each chapter should contain approximately 100 words, following the provided outline and integrating the characters and plot points into a cohesive narrative.'
)

task_image_generate = Task(
    description='Generate 5 images that captures the essence of the children\'s storybook about Animals, aligning with the themes, characters, and narrative outlined for the chapters. Do it one by one.',
    agent=image_generator,
    expected_output='A digital image file that visually represents the overarching theme of the children\'s storybook, incorporating elements from the characters and plot as described in the outline. The image should be suitable for inclusion in the storybook as an illustration.',
)

task_format_content = Task(
    description='Format the story content in markdown, including an image at the beginning of each chapter.',
    agent=content_formatter,
    expected_output='The entire storybook content formatted in markdown, with each chapter title followed by the corresponding image and the chapter content.',
    context=[task_write, task_image_generate],
    output_file="story.md"
)

task_markdown_to_pdf = Task(
    description='Convert a Markdown file to a PDF document, ensuring the preservation of formatting, structure, and embedded images using the mdpdf library.',
    agent=markdown_to_pdf_creator,
    expected_output='A PDF file generated from the Markdown input, accurately reflecting the content with proper formatting. The PDF should be ready for sharing or printing.'
)

crew = Crew(
  agents=[story_outliner, story_writer, image_generator, content_formatter, markdown_to_pdf_creator],
  tasks=[task_outline, task_write, task_image_generate, task_format_content, task_markdown_to_pdf],
  verbose=True,
  process=Process.sequential
)

result = crew.kickoff()

print(result)

Ollama

llm = ChatOpenAI(
    openai_api_base="http://localhost:11434/v1", # https://api.openai.com/v1 or https://api.groq.com/openai/v1 
    openai_api_key="Null", # os.getenv("OPENAI_API_KEY") or os.getenv("GROQ_API_KEY")
    model_name="mistral" #  gpt-4-turbo-preview or mixtral-8x7b-32768
)

template.md

# Title of the Book

## Chapter 1: The Magic Forest
![Chapter 1 Image](LOCAL_PATH_TO_IMAGE)
Contents of chapter 1...

## Chapter 2: The Hidden Cave
![Chapter 2 Image](LOCAL_PATH_TO_IMAGE)
Contents of chapter 2...

## Chapter 3: The Old Wise Tree
![Chapter 3 Image](LOCAL_PATH_TO_IMAGE)
Contents of chapter 3...

## Chapter 4: The River of Dreams
![Chapter 4 Image](LOCAL_PATH_TO_IMAGE)
Contents of chapter 4...

## Chapter 5: The Return Journey
![Chapter 5 Image](LOCAL_PATH_TO_IMAGE)
Contents of chapter 5...
Categories
AutoGen

AutoGen AI Agents To Write Books

import autogen, os
from autogen.agentchat.contrib.multimodal_conversable_agent import MultimodalConversableAgent

# Placeholder configuration for the Language Learning Model (LLM)
llm_config = {
        "config_list": [
            {
                'model': 'gpt-4-turbo-preview',
                'api_key': os.getenv("OPENAI_API_KEY"),
            }
        ]
    }  # Replace with actual LLM configuration details

# Define the DALLE configuration
dalle_llm_config = {
    "config_list": [
        {
            'model': 'dalle',
            'api_key': os.getenv("OPENAI_API_KEY"),
        }
    ]
}

# Agent to outline the book chapters
chapter_outliner = autogen.AssistantAgent(
    name="chapter_outliner",
    system_message="Outline the chapters for the book, focusing on a clear structure and logical flow.",
    llm_config=llm_config
)

# Agent to write the main content of the book
content_writer = autogen.AssistantAgent(
    name="content_writer",
    system_message="Write the main content for the chapters, ensuring coherence and engagement.",
    llm_config=llm_config
)

# Agent to create engaging characters for the book
character_developer = autogen.AssistantAgent(
    name="character_developer",
    system_message="Develop detailed and compelling characters that fit the book's narrative.",
    llm_config=llm_config
)

# Agent to edit the book for grammar and style
editor = autogen.AssistantAgent(
    name="editor",
    system_message="Review and edit the book's content for grammar, style, and overall readability.",
    llm_config=llm_config
)

# DALL-E Agent for cover design
cover_designer = MultimodalConversableAgent(
    name="cover_designer",
    system_message="Design a book cover using DALL-E based on the book's theme and content.",
    llm_config=dalle_llm_config
)

# User proxy agent to simulate the author's requests or initiate the book creation process
author_proxy = autogen.UserProxyAgent(
    name="author_proxy",
    is_termination_msg=lambda x: (x.get("content") or "").rstrip().endswith("TERMINATE"),
    human_input_mode="NEVER",
    max_consecutive_auto_reply=0,
    code_execution_config=False
)

# Group chat that combines all book creation agents including the cover designer with DALL-E
book_creation_chat = autogen.GroupChat(
    agents=[chapter_outliner, content_writer, character_developer, editor, cover_designer, author_proxy],
    messages=[],
    max_round=5
)

# Manager to coordinate the book creation process including cover design
book_chat_manager = autogen.GroupChatManager(groupchat=book_creation_chat, llm_config=llm_config)

# Initiating the chat with detailed instructions for book creation including cover design
author_proxy.initiate_chat(
    book_chat_manager,
    message=(
        "The book I want to write is a Fantasy novel. The tone should be suspenseful and immersive, "
        "targeting a length of 300 pages. The narrative is character-driven with intricate world-building. "
        "The chapter_outliner will structure the chapters, the content_writer will fill in the chapter details, "
        "the character_developer will provide character backstories, and the editor will polish the text. "
        "Lastly, the cover_designer, using DALL-E, will create a cover that resonates with the themes of "
        "magic and adventure. The cover should be visually striking and elicit a sense of wonder."
    ),
    
)
❯ python dalle-main.py
author_proxy (to chat_manager):

The book I want to write is a Fantasy novel. The tone should be suspenseful and immersive, targeting a length of 300 pages. The narrative is character-driven with intricate world-building. The chapter_outliner will structure the chapters, the content_writer will fill in the chapter details, the character_developer will provide character backstories, and the editor will polish the text. Lastly, the cover_designer, using DALL-E, will create a cover that resonates with the themes of magic and adventure. The cover should be visually striking and elicit a sense of wonder.

--------------------------------------------------------------------------------
chapter_outliner (to chat_manager):

### Chapter Outline for Fantasy Novel

#### Prologue: The Prophecy Unveiled
- A mysterious oracle reveals a prophecy that foreshadows the intertwining fates of the main characters in the world of Eldoria, setting the stage for their epic journey.

#### Chapter 1: In the Shadows of Eldoria
- Introduction to the world of Eldoria, its magic, politics, and the looming threat.
- Introduce the protagonist, Aelar, a young mage unaware of his destiny.

#### Chapter 2: Whispers of Rebellion
- Aelar discovers a rebel group plotting against the corrupt council ruling Eldoria.
- Introduces Lyria, a skilled archer with a secret, who saves Aelar from a dangerous situation.

#### Chapter 3: The Forbidden Magic
- Aelar and Lyria uncover ancient texts revealing forbidden magic that could change the course of the war.
- They decide to seek the magic, despite the dangers.

#### Chapter 4: Journey to the Elder Forest
- Their journey begins. Detailed description of their travels, focusing on the challenges they face and the landscapes of Eldoria.
- Introduce a mysterious character following them.

#### Chapter 5: Betrayal at Dusk
- The duo is betrayed by someone they thought was an ally, leading to a dramatic escape.
- They learn to trust only each other.

#### Chapter 6: The Guardian of the Forest
- Aelar and Lyria find the Elder Forest and meet its guardian, who tests their resolve.
- They pass the test and are granted an audience with the Forest Spirit.

#### Chapter 7: The Pact with the Forest Spirit
- The Forest Spirit reveals more about Aelar's destiny and the true nature of the forbidden magic.
- They form a pact that grants them new allies but also new enemies.

#### Chapter 8: The Siege of Shadowvale
- Returning to civilization, they find Shadowvale under siege by dark forces.
- A full-scale battle chapter where Aelar and Lyria put their new powers to the test.

#### Chapter 9: The Cost of Power
- The aftermath of the battle. Aelar and Lyria grapple with the cost of their newfound powers.
- Secrets about Aelar's ancestry come to light.

#### Chapter 10: The Labyrinth of Echoes
- They journey to a legendary labyrinth to uncover a weapon capable of turning the tide of the war.
- This chapter focuses on personal growth and revelation.

#### Chapter 11: The Echoes of Truth
- Inside the labyrinth, Aelar and Lyria confront their deepest fears and truths.
- They find the weapon, but its use comes with a heavy decision.

#### Chapter 12: The Final Stand
- Preparations for the final battle against the dark forces threatening Eldoria.
- Allies and enemies reveal their true colors.

#### Chapter 13: Clash of Fates
- The epic finale. A suspenseful, action-packed chapter where all the character arcs and plot points converge in a dramatic showdown.

#### Chapter 14: A New Dawn
- The aftermath of the battle. Reflections on loss, love, and the future.
- Hints at new adventures, wrapping up the story but leaving room for a sequel.

#### Epilogue: Beyond the Veil of Stars
- A glimpse into the future of Eldoria and our heroes, teasing the vastness of this world and potential for more stories.

- This structure provides a balanced mix of character development, world-building, and plot progression, ensuring a suspenseful and immersive reading experience.

--------------------------------------------------------------------------------
content_writer (to chat_manager):

### Prologue: The Prophecy Unveiled

In the heart of the mystical world of Eldoria, under a sky veiled by the shimmer of a thousand stars, the ancient oracle, cloaked in shadows, stood before the gathering of the realms. With a voice that weaved through the night, carrying the weight of ages, the oracle unveiled a prophecy long whispered in the winds but never spoken aloud—until now.

"In the time of great despair, when shadows lengthen and the land weeps, two souls shall emerge from the mists of destiny. Together, they shall hold the key to salvation or the harbinger of our undoing. The weaver of magic and the mistress of the bow, their fates intertwined, shall embark on a quest that shall lead them into the heart of darkness or into the dawn of a new era."

As the oracle's words faded into the night, leaving a veil of silence, the seeds of destiny were sown, igniting a spark that would grow into a flame of adventure, danger, and the struggle for the soul of Eldoria.

### Chapter 1: In the Shadows of Eldoria

The world of Eldoria was a land of contrasts, where the beauty of its lush landscapes was marred by the brewing storms of political intrigue and the looming threat of war. Magic coursed through the veins of the land, a gift that was both its salvation and curse.

In the heart of Eldoria, within the ancient city of Shadowvale, lived Aelar, a young mage of no particular renown. Orphaned and raised by the conclave of mages, Aelar's life was one of routine—study, practice, and the occasional, unremarkable chores. Yet, unbeknownst to him, the gears of fate had begun to turn, drawing him ever closer to a destiny foretold.

### Chapter 2: Whispers of Rebellion

It was during an errand for the conclave that Aelar stumbled upon the whispers of rebellion. In the shadowed alleys of Shadowvale, he encountered Lyria, her bow drawn, standing over the fallen form of a would-be assailant. With a nod of gratitude and a flurry of questions, Aelar found himself pulled into Lyria's world—a world where the whispers of rebellion against the corrupt council of Eldoria grew louder with each passing day.

Lyria, with her enigmatic smile and eyes that seemed to hold unspoken secrets, intrigued Aelar in a way he couldn't quite understand. Together, they began to unravel the threads of a conspiracy that threatened the very fabric of Eldoria.

### Chapter 3: The Forbidden Magic

The discovery of ancient texts within the hidden vaults of the conclave marked the beginning of a new chapter in Aelar and Lyria's journey. The texts spoke of forbidden magic, a power lost to the ages, capable of altering the balance of power in the looming conflict. Despite the danger, the promise of such magic ignited a spark of hope, a chance to turn the tides against the darkness encroaching upon their world.

Determined, yet wary of the path they were about to tread, Aelar and Lyria set their sights on the untamed wilds of Eldoria, where the secrets of the forbidden magic lay hidden, watched over by powers ancient and unfathomable.

### Chapter 4: Journey to the Elder Forest

Their journey was one of peril and discovery, through valleys shrouded in mist and over mountains that touched the sky. Eldoria revealed itself to Aelar and Lyria in all its splendor and danger, a land alive with magic, where every rock and tree held stories of old.

Yet, they were not alone. A shadow followed them, elusive and persistent, a silent watcher whose intentions remained hidden. As the landscapes of Eldoria unfolded before them, so too did the realization that their quest was about more than the search for forbidden magic—it was a journey into the very heart of who they were and who they were meant to become.

### Chapter 5: Betrayal at Dusk

The betrayal came at dusk, when the sky was painted in hues of blood and fire. An ally, a friend who had walked beside them through the trials of their journey, revealed their true colors, leaving Aelar and Lyria to face the consequences of their trust. Escape was their only option, a bitter lesson learned under the cloak of night.

The betrayal forged a new bond between them, a resolve hardened by the trials they had faced. Trust, they realized, was not a gift to be given lightly in the world of Eldoria. From that moment, they knew they could rely only on each other as they continued their quest for the forbidden magic.

### Chapter 6: The Guardian of the Forest

The Elder Forest stood ancient and wise, its secrets hidden beneath a canopy that stretched towards the heavens. It was here that Aelar and Lyria faced the guardian, a creature of the forest both fearsome and majestic. Their resolve was tested, their strength and will pushed to the limits.

In the end, their determination proved true, earning them the guardian's respect and, ultimately, an audience with the elusive Forest Spirit. The guardian, with a nod of acknowledgment, stepped aside, allowing them passage deeper into the heart of the Elder Forest, where destiny awaited.

### Chapter 7: The Pact with the Forest Spirit

Within the heart of the Elder Forest, beneath the ancient boughs where light seldom touched, the Forest Spirit awaited. Ethereal and timeless, it spoke of destiny, of Aelar's true heritage, and the nature of the forbidden magic they sought. The pact they formed with the Forest Spirit granted them new allies in the creatures of the wood and the blessings of the forest, but it also marked them. New enemies, drawn by the power they now held, emerged from the shadows, seeking to claim the forbidden magic for themselves.

United by their pact, Aelar and Lyria emerged from the Elder Forest changed, bearing the weight of their destiny with a newfound resolve. Ahead lay the siege of Shadowvale, a test of their powers and their conviction in the face of darkness.

### Chapter 8: The Siege of Shadowvale

The Siege of Shadowvale was a maelstrom of chaos and conflict, a city besieged by the dark forces that sought to claim Eldoria. Aelar and Lyria, standing side by side, unleashed the power of their new allies and the forbidden magic against the tide of darkness. The battle raged, a testament to their courage and the strength of their bond.

In the heart of the conflict, amidst the clash of steel and the roar of magic, they fought, not just for Shadowvale, but for the future of Eldoria. The siege was a crucible, forging them in the heat of battle, their powers tested as never before.

### Chapter 9: The Cost of Power

In the aftermath of the battle, the cost of their victory became apparent. The forbidden magic, while powerful, demanded a price, leaving traces of darkness that lingered in their souls. Secrets unfolded, revealing the depths of Aelar's ancestry, a lineage entwined with the fate of Eldoria itself.

Faced with the consequences of their actions, Aelar and Lyria grappled with the reality of their powers and the shadows that clung to their hearts. The path ahead was uncertain, yet the bond they shared, tempered in the fires of conflict, proved to be their guiding light.

### Chapter 10: The Labyrinth of Echoes

The journey to the legendary labyrinth was a journey inward, a quest for a weapon rumored to have the power to end the war. Yet, the labyrinth was more than a maze of stone and shadow; it was a reflection of their souls, a place where their deepest fears and truths lay bare.

Navigating the twisting paths, Aelar and Lyria confronted the echoes of their past, the doubts and fears that sought to ensnare them. Each step, each revelation, brought them closer to the heart of the labyrinth and the weapon it guarded, but also to the understanding of their own strength and the nature of their bond.

### Chapter 11: The Echoes of Truth

In the heart of the labyrinth, beneath a sky lit by stars unseen, Aelar and Lyria faced the echoes of truth. The weapon they sought lay within reach, its power undeniable but its use fraught with consequence. The labyrinth, with its trials and revelations, had shown them the complexity of their quest and the shades of grey that tinged their world.

The decision to wield the weapon, to embrace its power and the cost that came with it, was a burden they chose to bear together. In the echoes of truth, they found not just a weapon but a deeper understanding of the path they had chosen and the destiny that awaited them beyond the walls of the labyrinth.

### Chapter 12: The Final Stand

The preparations for the final stand against the dark forces threatening Eldoria were a time of unity and determination. Allies rallied, drawn by the courage of Aelar and Lyria, while enemies revealed their true intentions, setting the stage for the final confrontation.

The air was charged with anticipation, the weight of destiny palpable as the moment of the clash drew near. Strategies were laid, alliances forged in the fire of shared purpose, as every heart beat with the resolve to protect Eldoria or to witness its fall.

### Chapter 13: Clash of Fates

The clash of fates was a tempest, a clash of power and wills where the fate of Eldoria hung in the balance. Aelar and Lyria, standing at the heart of the storm, fought with the ferocity of those who had nothing left to lose and everything to gain.

Around them, the battle raged, a cacophony of magic and metal, as friends and foes alike fought for the future of their world. The outcome, uncertain and fraught with peril, was a testament to their journey, their struggles, and the strength they had found in each other.

### Chapter 14: A New Dawn

In the aftermath of the battle, as the dust settled and the sounds of conflict faded, a new dawn rose over Eldoria. The cost of their victory was etched in the scars of the land and the hearts of its people, a reminder of the price of freedom.

Aelar and Lyria, changed by their journey and the battles they had fought, stood together, looking out over a world that was both familiar and forever altered. The path ahead was unwritten, a canvas upon which they would chart the course of their destinies.

Reflections on the past mingled with hopes for the future as they considered the new adventures that awaited, the stories yet to be told. The world of Eldoria, with its beauty and its scars, lay open before them, a testament to their journey and the promise of a new day.

### Epilogue: Beyond the Veil of Stars

Beyond the veil of stars, in the hidden corners of Eldoria and the realms beyond, the echoes of the prophecy lingered. The world had been changed, shaped by the courage of those who dared to dream, to fight, to stand against the darkness.

In the quiet moments between the storms, the story of Aelar and Lyria whispered through the ages, a reminder of the light that can be found even in the darkest of times. And as the stars shone down upon Eldoria, the promise of new tales and adventures shimmered on the horizon, a call to those who would journey beyond the veil of stars.

--------------------------------------------------------------------------------
character_developer (to chat_manager):

### Character Developer

#### Aelar: The Weaver of Magic

**Backstory:** Aelar grew up an orphan within the walls of the Arcanum, the conclave of mages in the city of Shadowvale. His early life was marked by the mundane tasks and rigorous study expected of a young apprentice. However, beneath his unremarkable exterior lies a lineage of powerful magic tied to the very essence of Eldoria—a heritage unknown to him. The loss of his parents in a mysterious calamity that barely left traces of their existence has always been a void in his life, fueling his thirst for knowledge and unconsciously guiding him towards his destiny.

**Personality:** Aelar is introspective, with a natural curiosity that often leads him into trouble. Despite his upbringing among mages, he harbors a distrust of authority, stemming from the secretive nature of those who raised him. He is resourceful, quick to learn, and adapt, with an innate talent for magic that sets him apart. Aelar’s journey reveals a strength and depth of character driven by his desire for answers and a better understanding of his true place in the world.

**Appearance:** With unkempt auburn hair and keen emerald eyes, Aelar's appearance is both unassuming and captivating. His attire typically consists of the practical robes of an apprentice, though as his journey unfolds, his garb becomes an amalgamation of various cultures and influences he encounters.

#### Lyria: The Mistress of the Bow

**Backstory:** Raised in the borderlands of Eldoria, Lyria's skills with the bow were honed from a young age, a necessity of survival in the wilds. Her family was part of a small community that fell victim to the encroaching darkness spreading across the land, leaving her the sole survivor by a twist of fate. This tragedy instilled in her a silent resolve and a personal vendetta against the forces threatening Eldoria. Her path converged with Aelar’s in Shadowvale, where her quest for vengeance found a broader purpose.

**Personality:** Lyria is fierce and determined, with a stoic facade that rarely slips. Beneath her strong exterior lies a deep well of empathy, driving her to protect those who cannot defend themselves. Her experiences have made her wary of trusting others, yet her bond with Aelar grows into a steadfast alliance built on mutual respect and shared goals. Lyria’s tactical mind and survival skills make her an invaluable companion on their journey.

**Appearance:** Lyria moves with a grace that belies her strength, her lean physique a testament to a life of hardship and battle. Her dark hair is often pulled back, highlighting sharp blue eyes that miss nothing. Her attire is practical and suited to her nomadic lifestyle, with leather armor and a quiver always within reach.

#### The Oracle

**Backstory:** The Oracle of Eldoria is shrouded in mystery, an ageless entity whose origins are lost to time. Serving as the bridge between the realms and the mortal world, the Oracle’s visions have guided the fate of kingdoms and heroes alike. Yet, the Oracle’s allegiance is to the balance of the world, making their motivations inscrutable and their prophecies double-edged.

**Personality:** The Oracle speaks in riddles, their demeanor both ethereal and imposing. They possess an unfathomable depth of knowledge and wisdom, showing compassion as often as indifference. The Oracle’s interactions with Aelar and Lyria are cryptic, pushing them toward their destinies while allowing them the freedom to forge their own paths.

**Appearance:** The Oracle’s appearance is as fluid as their nature, often perceived differently by those who seek their counsel. To most, they appear as a figure cloaked in robes of shifting colors, their face obscured by the shadows of their hood. Their presence is accompanied by a sense of timeless power and an air of the unknowable.

These characters, with their rich backstories and diverse personalities, form the core of a narrative built on discovery, growth, and the struggle against darkness, driving the suspenseful and immersive world of Eldoria forward.

--------------------------------------------------------------------------------
editor (to chat_manager):

The outlined book presents a fascinating journey through the mystical world of Eldoria, following the adventures of Aelar, the weaver of magic, and Lyria, the mistress of the bow. Their quest against the dark forces threatening their world is fraught with challenges, revelations, and growth, marking each chapter with suspense and immersive storytelling. The characters, developed with depth and complexity, provide the narrative with heart and drive. The intricate world-building, alongside the character-driven plot, promises an engaging and memorable fantasy narrative. To enhance its readability and impact, the content could benefit from occasional tightening of prose and deeper exploration of emotional landscapes in key moments. Overall, this fantasy novel is poised to captivate readers with its rich storytelling and imaginative journey.

--------------------------------------------------------------------------------
Categories
AutoGen

AutoGenBench AI Agents Evaluation

RankModelSuccesses (Total)
1GPT-4 Turbo Preview45
2Mixtral25
3Llama 2 70B18
4Hermes 2 Pro13
5Mistral13
6Code Llama12
7Gemma 7b4
pip install autogenbench
export OPENAI_API_KEY=xxxxxxxxxxxx
autogenbench clone HumanEval
cd HumanEval
touch OAI_CONFIG_LIST 

Open OAI_CONFIG_LIST file and it should contain below data

[
    {
        "model": "gpt-4-turbo-preview"
    }
]
export OAI_CONFIG_LIST=$(cat ./OAI_CONFIG_LIST)
autogenbench run --subsample 0.1 --repeat 3 Tasks/human_eval_TwoAgents.jsonl
autogenbench tabulate Results/human_eval_TwoAgents

GPT 4 Turbo Preview

                 Trial 0    Trial 1    Trial 2
Task Id          Success    Success    Success
-------------  ---------  ---------  ---------
HumanEval_107       False      True       True
HumanEval_22        True       True       True
HumanEval_43        True       True       True
HumanEval_88        True       True       True
HumanEval_14        True       True       True
HumanEval_157       True       True       True
HumanEval_141       True       True       True
HumanEval_57        True       True       True
HumanEval_154       True       True       True
HumanEval_153       True       True       True
HumanEval_93        False      True      False
HumanEval_137       True       True       True
HumanEval_143       True       True       True
HumanEval_13        True       True       True
HumanEval_49        True       True       True
HumanEval_95        True       True       True
-------------  ---------  ---------  ---------
Successes             14         16         15
Failures               2          0          1
Missing                0          0          0
Total                 16         16         16

CAUTION: 'autogenbench tabulate' is in early preview.
Please do not cite these values in academic work without first inspecting and verifying the results in the logs yourself.
  • autogenbench clone HumanEval downloads and expands the HumanEval benchmark scenario.
  • cd HumanEval; navigates to the benchmark directory
  • autogenbench run --subsample 0.1 --repeat 3 Tasks/human_eval_two_agents.jsonl runs a 10% subsample of the tasks defined in Tasks/human_eval_two_agents.jsonl. Each task is run 3 times.
  • autogenbench tabulate results/human_eval_two_agents tabulates the results of the run.

Hermes 2 Pro

[
    {
        "model": "adrienbrault/nous-hermes2pro:Q5_K_S",
        "base_url": "http://X.X.X.X:11434/v1",
        "api_type": "openai",
        "api_key": "NULL"
    }
]
(autogen) ➜  HumanEval autogenbench tabulate Results/human_eval_TwoAgents
Using tabulation method defined in '/home/Ubuntu/autogen/HumanEval/Scripts/custom_tabulate.py'

                 Trial 0    Trial 1    Trial 2
Task Id          Success    Success    Success
-------------  ---------  ---------  ---------
HumanEval_107       True      False       True
HumanEval_22        True      False      False
HumanEval_43       False       True       True
HumanEval_88       False      False      False
HumanEval_14       False      False      False
HumanEval_157      False      False      False
HumanEval_141      False       True      False
HumanEval_57        True      False      False
HumanEval_154      False      False      False
HumanEval_153      False      False       True
HumanEval_93       False      False      False
HumanEval_137      False      False      False
HumanEval_143      False      False       True
HumanEval_13       False      False       True
HumanEval_49        True       True      False
HumanEval_95       False       True      False
-------------  ---------  ---------  ---------
Successes              4          4          5
Failures              12         12         11
Missing                0          0          0
Total                 16         16         16

CAUTION: 'autogenbench tabulate' is in early preview and is not thoroughly tested.
Please do not cite values from these calculations in academic work without first inspecting and verifying the results in the run logs yourself.

Mistral

[
    {
        "model": "mistral",
        "base_url": "http://X.X.X.X:11434/v1",
        "api_type": "openai",
        "api_key": "NULL"
    }
]
(autogen) ➜  HumanEval autogenbench tabulate Results/human_eval_TwoAgents
Using tabulation method defined in '/home/Ubuntu/autogen/HumanEval/Scripts/custom_tabulate.py'

                 Trial 0    Trial 1    Trial 2
Task Id          Success    Success    Success
-------------  ---------  ---------  ---------
HumanEval_107       True      False       True
HumanEval_22       False       True       True
HumanEval_43        True      False       True
HumanEval_88       False      False      False
HumanEval_14       False      False      False
HumanEval_157      False      False      False
HumanEval_141      False      False      False
HumanEval_57        True       True      False
HumanEval_154      False      False       True
HumanEval_153      False      False      False
HumanEval_93       False      False      False
HumanEval_137      False      False      False
HumanEval_143      False      False       True
HumanEval_13       False       True       True
HumanEval_49       False      False      False
HumanEval_95       False       True      False
-------------  ---------  ---------  ---------
Successes              3          4          6
Failures              13         12         10
Missing                0          0          0
Total                 16         16         16

CAUTION: 'autogenbench tabulate' is in early preview and is not thoroughly tested.
Please do not cite values from these calculations in academic work without first inspecting and verifying the results in the run logs yourself.

Code Llama

(autogen) ➜  HumanEval autogenbench tabulate Results/human_eval_TwoAgents
Using tabulation method defined in '/home/Ubuntu/autogen/HumanEval/Scripts/custom_tabulate.py'

                 Trial 0    Trial 1    Trial 2
Task Id          Success    Success    Success
-------------  ---------  ---------  ---------
HumanEval_107       True       True      False
HumanEval_22       False       True       True
HumanEval_43        True       True       True
HumanEval_88       False      False      False
HumanEval_14        True      False      False
HumanEval_157      False      False      False
HumanEval_141      False      False      False
HumanEval_57       False      False      False
HumanEval_154      False       True      False
HumanEval_153      False      False      False
HumanEval_93       False      False      False
HumanEval_137      False      False      False
HumanEval_143      False      False       True
HumanEval_13       False       True      False
HumanEval_49       False      False       True
HumanEval_95       False      False      False
-------------  ---------  ---------  ---------
Successes              3          5          4
Failures              13         11         12
Missing                0          0          0
Total                 16         16         16

CAUTION: 'autogenbench tabulate' is in early preview and is not thoroughly tested.
Please do not cite values from these calculations in academic work without first inspecting and verifying the results in the run logs yourself.
[
    {
        "model": "codellama",
        "base_url": "http://X.X.X.X:11434/v1",
        "api_type": "openai",
        "api_key": "NULL"
    }
]

Mixtral

(autogen) ➜  HumanEval autogenbench tabulate Results/human_eval_TwoAgents
Using tabulation method defined in '/home/Ubuntu/autogen/HumanEval/Scripts/custom_tabulate.py'

                 Trial 0    Trial 1    Trial 2
Task Id          Success    Success    Success
-------------  ---------  ---------  ---------
HumanEval_107       True       True       True
HumanEval_22       False      False      False
HumanEval_43        True       True       True
HumanEval_88       False      False      False
HumanEval_14        True       True       True
HumanEval_157      False      False      False
HumanEval_141      False      False      False
HumanEval_57        True       True       True
HumanEval_154       True       True       True
HumanEval_153       True       True       True
HumanEval_93       False      False      False
HumanEval_137      False      False      False
HumanEval_143       True       True       True
HumanEval_13        True       True       True
HumanEval_49       False      False       True
HumanEval_95       False      False      False
-------------  ---------  ---------  ---------
Successes              8          8          9
Failures               8          8          7
Missing                0          0          0
Total                 16         16         16

CAUTION: 'autogenbench tabulate' is in early preview and is not thoroughly tested.
Please do not cite values from these calculations in academic work without first inspecting and verifying the results in the run logs yourself.

Llama 2 70B

[
    {
        "model": "llama2-70b-4096",
        "base_url": "https://api.groq.com/openai/v1",
        "api_type": "openai",
        "api_key": "GROQ_API_KEY"
    }
]
(autogen) ➜  HumanEval autogenbench tabulate Results/human_eval_TwoAgents
Using tabulation method defined in '/home/Ubuntu/autogen/HumanEval/Scripts/custom_tabulate.py'

                 Trial 0    Trial 1    Trial 2
Task Id          Success    Success    Success
-------------  ---------  ---------  ---------
HumanEval_107       True       True       True
HumanEval_22        True       True       True
HumanEval_43        True       True       True
HumanEval_88       False      False      False
HumanEval_14       False      False      False
HumanEval_157      False      False      False
HumanEval_141      False      False      False
HumanEval_57       False      False      False
HumanEval_154       True       True       True
HumanEval_153       True       True       True
HumanEval_93       False      False      False
HumanEval_137      False      False      False
HumanEval_143      False      False      False
HumanEval_13       False      False      False
HumanEval_49        True       True       True
HumanEval_95       False      False      False
-------------  ---------  ---------  ---------
Successes              6          6          6
Failures              10         10         10
Missing                0          0          0
Total                 16         16         16

CAUTION: 'autogenbench tabulate' is in early preview and is not thoroughly tested.
Please do not cite values from these calculations in academic work without first inspecting and verifying the results in the run logs yourself.

Gemma 7b

(autogen) ➜  HumanEval autogenbench tabulate Results/human_eval_TwoAgents
Using tabulation method defined in '/home/Ubuntu/autogen/HumanEval/Scripts/custom_tabulate.py'

                 Trial 0    Trial 1    Trial 2
Task Id          Success    Success    Success
-------------  ---------  ---------  ---------
HumanEval_107      False      False      False
HumanEval_22        True      False       True
HumanEval_43       False      False      False
HumanEval_88       False      False      False
HumanEval_14       False      False      False
HumanEval_157      False      False      False
HumanEval_141      False      False      False
HumanEval_57       False      False      False
HumanEval_154      False      False      False
HumanEval_153      False      False      False
HumanEval_93       False      False      False
HumanEval_137      False      False      False
HumanEval_143      False      False      False
HumanEval_13       False      False       True
HumanEval_49       False      False      False
HumanEval_95       False      False       True
-------------  ---------  ---------  ---------
Successes              1          0          3
Failures              15         16         13
Missing                0          0          0
Total                 16         16         16

CAUTION: 'autogenbench tabulate' is in early preview and is not thoroughly tested.
Please do not cite values from these calculations in academic work without first inspecting and verifying the results in the run logs yourself.

OAI_CONFIG_LIST

[
    {
        "model": "gemma-7b-it",
        "base_url": "https://api.groq.com/openai/v1",
        "api_type": "openai",
        "api_key": "GROQ_API_KEY"
    }
]

Groq

OAI_CONFIG_LIST

[
    {
        "model": "mixtral-8x7b-32768",
        "base_url": "https://api.groq.com/openai/v1",
        "api_type": "openai",
        "api_key": "GROQ_API_KEY"
    }
]

Ollama

OAI_CONFIG_LIST

[
    {
        "model": "adrienbrault/nous-hermes2pro:Q5_K_S",
        "base_url": "http://X.X.X.X:11434/v1",
        "api_type": "openai",
        "api_key": "NULL"
    }
]

https://github.com/MervinPraison/AutoGen-HumanEval-Results/

Categories
Finetuning

Fine Tuning Doodles Cards on idefics-9b Vision Model

conda create -n finetuning python=3.11 -y
conda activate finetuning
export HF_HUB_ENABLE_HF_TRANSFER=1
export HF_TOKEN=xxxxxxxxxx 
pip install datasets transformers bitsandbytes sentencepiece accelerate loralib peft pillow torch torchvision hf_transfer
import torch, os, locale
import torchvision.transforms as transforms
from datasets import load_dataset
from peft import LoraConfig, get_peft_model
from PIL import Image
from transformers import IdeficsForVisionText2Text, AutoProcessor, Trainer, TrainingArguments, BitsAndBytesConfig

# 1. Setup: Initial configuration and model preparation
device = "cuda" if torch.cuda.is_available() else "cpu"
checkpoint = "HuggingFaceM4/idefics-9b"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    llm_int8_skip_modules=["lm_head", "embed_tokens"]
)
processor = AutoProcessor.from_pretrained(checkpoint)
model = IdeficsForVisionText2Text.from_pretrained(checkpoint, quantization_config=bnb_config, device_map="auto")
print(f"Model: {model}")

# 2. Before Fine Tuning
def do_inference(model, processor, prompts, max_new_tokens=50):
    tokenizer = processor.tokenizer
    bad_words = ["<image>", "<fake_token_around_image>"]
    if len(bad_words) > 0:
        bad_words_ids = tokenizer(bad_words, add_special_tokens=False).input_ids
    eos_token = "</s>"
    eos_token_id = tokenizer.convert_tokens_to_ids(eos_token)
    inputs = processor(prompts, return_tensors="pt").to(device)
    generated_ids = model.generate(
        **inputs,
        eos_token_id=[eos_token_id],
        bad_words_ids=bad_words_ids,
        max_new_tokens=max_new_tokens,
        num_beams=2,
        early_stopping=True
    )
    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
    print(generated_text)

print(f"### Before Fine Tuning ###\n")    
image_path = "https://storage.googleapis.com/image-praison/wp-content/2024/03/b82ccbbd-image-5.jpg"
prompts = [
    [image_path, "Question: What's in this picture? Answer:"] 
]
do_inference(model, processor, prompts, max_new_tokens=50)

# 3. Image preprocessing: Convert image to RGB and apply transformations
def convert_to_rgb(image):
  if image.mode == "RGB":
    return image
  image_rgba = image.convert("RGBA")
  background = Image.new("RGBA", image_rgba.size, (255,255,255))
  alpha_composite = Image.alpha_composite(background, image_rgba)
  alpha_composite = alpha_composite.convert("RGB")
  return alpha_composite

def ds_transforms(example_batch):
  image_size = processor.image_processor.image_size
  image_mean = processor.image_processor.image_mean
  image_std = processor.image_processor.image_std
  image_transform = transforms.Compose([
      convert_to_rgb,
      transforms.RandomResizedCrop((image_size, image_size), scale=(0.9, 1.0), interpolation=transforms.InterpolationMode.BICUBIC),
      transforms.ToTensor(),
      transforms.Normalize(mean=image_mean, std=image_std)
  ])
  prompts = []
  for i in range(len(example_batch['text'])):
      prompts.append(
          [
              example_batch['image'][i],
              f"Question: What's in this picture? Answer: This is {example_batch['text'][i]}.</s>" 
          ],
      )
  inputs = processor(prompts, transform=image_transform, return_tensors="pt").to(device)
  inputs["labels"] = inputs["input_ids"]
  return inputs

# 4. Dataset preparation: Loading and transforming the dataset for training
ds = load_dataset("julianmoraes/doodles-captions-BLIP")
ds = ds["train"].train_test_split(test_size=0.1)
train_ds = ds["train"]
eval_ds = ds["test"]
train_ds.set_transform(ds_transforms) 
eval_ds.set_transform(ds_transforms)

# 5. Training: Setting up model training parameters and LoRA configuration
model_name = checkpoint.split("/")[1]
config = LoraConfig(
    r = 16,
    lora_alpha = 32,
    target_modules = ["q_proj", "k_proj", "v_proj"],
    lora_dropout = 0.05,
    bias="none"
)
model = get_peft_model(model, config)
model.print_trainable_parameters()
training_args = TrainingArguments(
    output_dir = f"{model_name}-logo",
    learning_rate = 2e-4,
    fp16 = True,
    per_device_train_batch_size = 2,
    per_device_eval_batch_size = 2,
    gradient_accumulation_steps = 8,
    dataloader_pin_memory = False,
    save_total_limit = 3,
    evaluation_strategy ="steps",
    save_strategy = "steps",
    eval_steps = 10,
    save_steps = 25,
    max_steps = 25,
    logging_steps = 5,
    remove_unused_columns = False,
    push_to_hub=False,
    label_names= ["labels"],
    load_best_model_at_end = False,
    report_to = "none",
    optim = "paged_adamw_8bit",
)
trainer = Trainer(
    model = model,
    args = training_args,
    train_dataset = train_ds,
    eval_dataset = eval_ds
)
trainer.train()

print(f"### After Fine Tuning ###\n")
image_path = "https://storage.googleapis.com/image-praison/wp-content/2024/03/b82ccbbd-image-5.jpg"
prompts = [
    [image_path, "Question: What's in this picture? Answer:"] 
]
do_inference(model, processor, prompts, max_new_tokens=50)

# 6. Model saving and pushing to HuggingFace Hub
locale.getpreferredencoding = lambda: "UTF-8"
model = model.merge_and_unload()
model.save_pretrained(f"{model_name}-doodles")
tokenizer = processor.tokenizer
tokenizer.save_pretrained(f"{model_name}-doodles")
model.push_to_hub(f"{model_name}-doodles", use_temp_dir=False, token=os.getenv("HF_TOKEN"))
tokenizer.push_to_hub(f"{model_name}-doodles", use_temp_dir=False, token=os.getenv("HF_TOKEN"))
Categories
AI

GPU Cost Comparison

#VRAMFloat16 TFLOPsFloat8 TFLOPsBand WidthW Per CardPricePer GPUPer fp16 GFLOPPer fp8 GFLOPBF16Supply
KaggleTesla T41166532070000.000N5
LambdaA6000 Ada448364.25728.59603003.20.82.1961.098Y1
LambdaH100 PCIe180989197920003502.492.492.5181.258Y1
RunpodA6000 Ada848364.25728.59603009.121.143.1301.565Y2
ColabTesla T411665320700.2050.2053.154N5
LambdaH100 SXM8809891979335070027.923.493.5291.764Y0
RunpodL4824121242.5300723.520.443.6361.814Y0
RunpodRTX A450042094.66402001.440.363.805Y2
RunpodH100 PCIe8809891979335070031.123.893.9331.966Y3
RunpodRTX A5000824111.17682303.520.443.960Y3
RunpodL40S848362.0573386435011.921.494.1152.033Y2
LambdaA100 PCIe44031215552505.161.294.135Y0
ColabA10014031215552501.3081.3084.192Y2
RunpodRTX A400081676.74481402.720.344.433Y2
RunpodRTX 4090824165.2330.310084505.920.744.4792.240Y3
RunpodA40848149.76963005.520.694.609Y3
RunpodH100 SXM8809891979335070037.524.694.7422.370Y3
RunpodA4000 Ada42082163.83601301.560.394.7562.381Y2
ColabV1001161129003000.5450.5454.866N5
LambdaV1008161129003004.40.554.911N1
RunpodRTX A6000848154.857683006.320.795.102Y3
GoogleTesla T441665320701.40.355.385N5
LambdaA100 SXM880312203940014.321.795.737Y0
LambdaA101241252506001500.750.756.0003.000Y3
RunpodA100 PCIe880312155525015.121.896.058Y3
RunpodRTX 3090824719363503.520.446.197Y1
RunpodL40848181.053628643009.121.146.2973.149Y2
RunpodA100 SXM880312203940018.322.297.340Y2
GoogleL4824121242.5300728.0033311.0004163758.2684.125Y4
GoogleA100 PCIe840312155525029.3878163.67347711.774Y4
AWSA100 PCIe840312155525032.774.0962513.129Y4
AWSTesla T481665320707.8240.97815.046N5
GoogleA100 SXM880312203940040.55045.068816.246Y4
AWSA100 SXM880312203940040.965.1216.410Y4
GoogleV10081611290030019.842.4822.143N5
AWSV10081611290030024.483.0627.321N5

Source: https://docs.google.com/spreadsheets/d/1tRbUj8qjsnZdUJOEXJFRLFSOwOWKFaA3hgRC1XWje-w/edit#gid=0

Categories
Finetuning

Fine Tuning Pokemon Cards on idefics-9b Vision Model

!pip install -q datasets
!pip install -q git+https://github.com/huggingface/transformers
!pip install -q bitsandbytes sentencepiece accelerate loralib
!pip install -q -U git+https://github.com/huggingface/peft.git
import torch
from datasets import load_dataset
from peft import LoraConfig, get_peft_model
from PIL import Image
from transformers import IdeficsForVisionText2Text, AutoProcessor, Trainer, TrainingArguments, BitsAndBytesConfig
device = "cuda" if torch.cuda.is_available() else "cpu"
checkpoint = "HuggingFaceM4/idefics-9b"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    llm_int8_skip_modules=["lm_head", "embed_tokens"]
)
processor = AutoProcessor.from_pretrained(checkpoint)
model = IdeficsForVisionText2Text.from_pretrained(checkpoint, quantization_config=bnb_config, device_map="auto")
model
# Inference
def do_inference(model, processor, prompts, max_new_tokens=50):
    tokenizer = processor.tokenizer
    bad_words = ["<image>", "<fake_token_around_image>"]
    if len(bad_words) > 0:
        bad_words_ids = tokenizer(bad_words, add_special_tokens=False).input_ids
    eos_token = "</s>"
    eos_token_id = tokenizer.convert_tokens_to_ids(eos_token)

    inputs = processor(prompts, return_tensors="pt").to(device)
    generated_ids = model.generate(
        **inputs,
        eos_token_id=[eos_token_id],
        bad_words_ids=bad_words_ids,
        max_new_tokens=max_new_tokens,
        early_stopping=True
    )
    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
    print(generated_text)
import torchvision.transforms as transforms
url = "https://hips.hearstapps.com/hmg-prod/images/cute-photos-of-cats-in-grass-1593184777.jpg"
prompts = [
    url,
    "Question: What's on the picture? Answer:",
]
##preprocessing
def convert_to_rgb(image):
  if image.mode == "RGB":
    return image

  image_rgba = image.convert("RGBA")
  background = Image.new("RGBA", image_rgba.size, (255,255,255))
  alpha_composite = Image.alpha_composite(background, image_rgba)
  alpha_composite = alpha_composite.convert("RGB")
  return alpha_composite

def ds_transforms(example_batch):
  image_size = processor.image_processor.image_size
  image_mean = processor.image_processor.image_mean
  image_std = processor.image_processor.image_std

  image_transform = transforms.Compose([
      convert_to_rgb,
      transforms.RandomResizedCrop((image_size, image_size), scale=(0.9, 1.0), interpolation=transforms.InterpolationMode.BICUBIC),
      transforms.ToTensor(),
      transforms.Normalize(mean=image_mean, std=image_std)
  ])

  prompts = []
  for i in range(len(example_batch['caption'])):
    caption = example_batch['caption'][i].split(".")[0]
    prompts.append(
        [
            example_batch['image_url'][i],
            f"Question: What's on the picture? Answer: This is {example_batch['name']}. {caption}</s>",
        ],
    )
  inputs = processor(prompts, transform=image_transform, return_tensors="pt").to(device)
  inputs["labels"] = inputs["input_ids"]
  return inputs
#Load and prepare the data
ds = load_dataset("TheFusion21/PokemonCards")
ds = ds["train"].train_test_split(test_size=0.002)
train_ds = ds["train"]
eval_ds = ds["test"]
train_ds.set_transform(ds_transforms)
eval_ds.set_transform(ds_transforms)
model_name = checkpoint.split("/")[1]
config = LoraConfig(
    r = 16,
    lora_alpha = 32,
    target_modules = ["q_proj", "k_proj", "v_proj"],
    lora_dropout = 0.05,
    bias="none"
)
model = get_peft_model(model, config)
model.print_trainable_parameters()
training_args = TrainingArguments(
    output_dir = f"{model_name}-PokemonCards",
    learning_rate = 2e-4,
    fp16 = True,
    per_device_train_batch_size = 2,
    per_device_eval_batch_size = 2,
    gradient_accumulation_steps = 8,
    dataloader_pin_memory = False,
    save_total_limit = 3,
    evaluation_strategy ="steps",
    save_strategy = "steps",
    eval_steps = 10,
    save_steps = 25,
    max_steps = 25,
    logging_steps = 5,
    remove_unused_columns = False,
    push_to_hub=False,
    label_names = ["labels"],
    load_best_model_at_end = False,
    report_to = "none",
    optim = "paged_adamw_8bit",
)
trainer = Trainer(
    model = model,
    args = training_args,
    train_dataset = train_ds,
    eval_dataset = eval_ds
)
trainer.train()
url = "https://images.pokemontcg.io/pop6/2_hires.png"
prompts = [
    url,
    "Question: What's on the picture? Answer:",
]
do_inference(model, processor, prompts, max_new_tokens=100)
import locale
locale.getpreferredencoding = lambda: "UTF-8"
## Push to hub
model = model.merge_and_unload()
model.push_to_hub(f"{model_name}-PokemonCards", use_temp_dir=False, private=False, token="API_TOKEN")
Categories
Finetuning

Merge Base Model and PEFT Adapter and Push it to HF Hub

# Example usage:
# python merge_peft.py --base_model=meta-llama/Llama-2-7b-hf --peft_model=./qlora-out --hub_id=alpaca-qlora

from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import torch

import argparse

def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument("--base_model", type=str)
    parser.add_argument("--peft_model", type=str)
    parser.add_argument("--hub_id", type=str)

    return parser.parse_args()

def main():
    args = get_args()

    print(f"[1/5] Loading base model: {args.base_model}")
    base_model = AutoModelForCausalLM.from_pretrained(
        args.base_model,
        return_dict=True,
        torch_dtype=torch.float16,
        device_map="auto",
    )
    tokenizer = AutoTokenizer.from_pretrained(args.base_model)

    print(f"[2/5] Loading adapter: {args.peft_model}")
    model = PeftModel.from_pretrained(base_model, args.peft_model, device_map="auto")
    
    print("[3/5] Merge base model and adapter")
    model = model.merge_and_unload()
    
    print(f"[4/5] Saving model and tokenizer in {args.hub_id}")
    model.save_pretrained(f"{args.hub_id}")
    tokenizer.save_pretrained(f"{args.hub_id}")

    print(f"[5/5] Uploading to Hugging Face Hub: {args.hub_id}")
    model.push_to_hub(f"{args.hub_id}", use_temp_dir=False)
    tokenizer.push_to_hub(f"{args.hub_id}", use_temp_dir=False)
    
    print("Merged model uploaded to Hugging Face Hub!")

if __name__ == "__main__" :
    main()
Categories
Machine Learning

Machine Learning Concepts and Techniques

TermDefinitionExample OutputsInput DataGoalModel Complexity
Large Language ModelsModels trained on large text datasets for language tasks.Textual output, translations.TextUnderstanding/generating languageVery high
RegressionPredicting numerical values.House prices, temperatures.Numeric, categoricalQuantitative predictionsVaries
ClassificationPredicting categories or classes.Spam or not spam, image categories.Numeric, categoricalCategorical predictionsVaries
ClusteringGrouping data points based on similarity.Customer segments, data categorizationNumeric, categoricalData segmentationModerate to high
Dimensionality ReductionReducing the number of random variables under consideration.Reduced feature set for visualizationNumeric, categoricalSimplify data, improve efficiencyModerate
Reinforcement LearningLearning to make decisions through rewards.Game playing, autonomous vehicles.Environment statesOptimize decision-makingHigh
Anomaly DetectionIdentifying unusual data points.Fraud detection, system health monitoringNumeric, categoricalIdentify outliersModerate to high
Recommendation SystemsSuggesting products or services.Movie/music recommendations, product suggestionsUser-item interactionsPersonalize suggestionsHigh

Embeddings and re-ranking typically fall under multiple categories, depending on their application:

  • Embeddings: Dimensionality Reduction, sometimes Clustering.
    • Purpose: Convert high-dimensional data (like text) into lower-dimensional vectors to capture semantic meaning or relationships.
    • Applications: Used in various ML tasks, including classification, recommendation systems, and natural language processing.
  • Re-ranking: Often associated with Recommendation Systems and Classification.
    • Purpose: Adjust or refine the ordering of a list of items based on certain criteria or additional models.
    • Applications: Improving the relevance of search engine results, enhancing recommendation systems.

Others

  1. Feature Engineering: The process of using domain knowledge to extract features from raw data that make machine learning algorithms work.
  2. Hyperparameter Tuning: The process of optimizing the parameters that govern the training process of a machine learning model.
  3. Transfer Learning: Leveraging a pre-trained model on a new, related problem.
  4. Ensemble Methods: Techniques that combine the predictions from multiple machine learning algorithms to make more accurate predictions than any individual model.
  5. Generative Models: Models that can generate new data instances that resemble your training data, such as GANs (Generative Adversarial Networks).
  6. Sequence Modeling: Techniques used for predicting the next item in a sequence, applicable in time series analysis, natural language processing, etc.
  7. Attention Mechanisms: Components that allow models to focus on specific parts of the input for generating a particular output, crucial in advanced language models.
  8. Graph Neural Networks (GNNs): Neural networks designed to directly work with data that is structured as graphs, useful in social network analysis, recommendation systems, etc.
  9. Self-supervised Learning: A type of learning where the data itself provides supervision, often used in situations where labeled data is scarce.
  10. Multi-task Learning: An approach where a single model is trained on multiple related tasks, which can improve the model’s performance on individual tasks.
  11. Few-shot Learning: Techniques that aim to train models on a very small amount of data.
  12. Reinforcement Learning: Learning to make decisions by taking actions in an environment to maximize some notion of cumulative reward.
  13. Natural Language Understanding (NLU): Techniques for machines to understand and interpret human language, beyond just generating or processing text.
  14. Speech Recognition: Converting spoken words into text, a specific application of machine learning models tailored for audio data.
  15. Computer Vision: Techniques for machines to interpret and understand the visual world, involving tasks like image recognition, object detection, and more.
  16. Bias and Fairness in AI: Approaches to ensure machine learning models do not perpetuate or amplify bias, and are fair in their decisions.
Categories
Embedding

Cohere RAG with Reranking

pip install cohere wikipedia langchain-text-splitters numpy
import cohere, os
import wikipedia

co = cohere.Client(os.environ['COHERE_API_KEY'])

# let's get the wikipedia article about Dune Part Two
article = wikipedia.page('Dune Part Two')
text = article.content
print(f"The text has roughly {len(text.split())} words.")

# 1. Indexing and given a user query, retrieve the relevant chunks from the index
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=512,
    chunk_overlap=50,
    length_function=len,
    is_separator_regex=False,
)

chunks_ = text_splitter.create_documents([text])
chunks = [c.page_content for c in chunks_]
print(f"The text has been broken down in {len(chunks)} chunks.")

# Because the texts being embedded are the chunks we are searching over, we set the input type as search_doc
model="embed-english-v3.0"
response = co.embed(
    texts= chunks,
    model=model,
    input_type="search_document",
    embedding_types=['float']
)
embeddings = response.embeddings.float
print(f"We just computed {len(embeddings)} embeddings.")

# ### Store the embeddings in a vector database
# We use the simplest vector database ever: a python dictionary using `np.array()`.
import numpy as np
vector_database = {i: np.array(embedding) for i, embedding in enumerate(embeddings)}

query = "Name everyone involved in writing the script, directing, and producing 'Dune: Part Two'?"

# ### Embed the user question
# Because the text being embedded is the search query, we set the input type as search_query
response = co.embed(
    texts=[query],
    model=model,
    input_type="search_query",
    embedding_types=['float']
)
query_embedding = response.embeddings.float[0]
print("query_embedding: ", query_embedding)

# ### Retrieve the most relevant chunks from the vector database (cosine similarity)
def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

# Calculate similarity between the user question & each chunk
similarities = [cosine_similarity(query_embedding, chunk) for chunk in embeddings]
print("similarity scores: ", similarities)

# Get indices of the top 10 most similar chunks
sorted_indices = np.argsort(similarities)[::-1]

# Keep only the top 10 indices
top_indices = sorted_indices[:10]
print("Here are the indices of the top 10 chunks after retrieval: ", top_indices)

# Retrieve the top 10 most similar chunks
top_chunks_after_retrieval = [chunks[i] for i in top_indices]
print("Here are the top 10 chunks after retrieval: ")
for t in top_chunks_after_retrieval:
    print("== " + t)

# 2. Rerank the chunks retrieved from the vector database
# We rerank the 10 chunks retrieved from the vector database. 
# Reranking boosts retrieval accuracy.
# Reranking lets us go from 10 chunks retrieved from the vector database, to the 3 most relevant chunks.

response = co.rerank(
    query=query,
    documents=top_chunks_after_retrieval,
    top_n=3,
    model="rerank-english-v2.0",
)

top_chunks_after_rerank = [result.document['text'] for result in response]
print("Here are the top 3 chunks after rerank: ")
for t in top_chunks_after_rerank:
    print("== " + t)

# 3. Generate the model final answer, given the retrieved and reranked chunks

# preamble containing instructions about the task and the desired style for the output.
preamble = """
## Task & Context
You help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user's needs as best you can, which will be wide-ranging.

## Style Guide
Unless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling.
"""

# retrieved documents
documents = [
    {"title": "chunk 0", "snippet": top_chunks_after_rerank[0]},
    {"title": "chunk 1", "snippet": top_chunks_after_rerank[1]},
    {"title": "chunk 2", "snippet": top_chunks_after_rerank[2]},
  ]

# get model response
response = co.chat(
  message=query,
  documents=documents,
  preamble=preamble,
  model="command-r",
  temperature=0.3
)

print("Final answer:")
print(response.text)

# 4. Citations
print("Citations that support the final answer:")
for cite in response.citations:
  print(cite)

def insert_citations_in_order(text, citations):
    """
    A helper function to pretty print citations.
    """
    offset = 0
    document_id_to_number = {}
    citation_number = 0
    modified_citations = []

    # Process citations, assigning numbers based on unique document_ids
    for citation in citations:
        citation_numbers = []
        for document_id in sorted(citation["document_ids"]):
            if document_id not in document_id_to_number:
                citation_number += 1  # Increment for a new document_id
                document_id_to_number[document_id] = citation_number
            citation_numbers.append(document_id_to_number[document_id])

        # Adjust start/end with offset
        start, end = citation['start'] + offset, citation['end'] + offset
        placeholder = ''.join([f'[{number}]' for number in citation_numbers])
        # Bold the cited text and append the placeholder
        modification = f'**{text[start:end]}**{placeholder}'
        # Replace the cited text with its bolded version + placeholder
        text = text[:start] + modification + text[end:]
        # Update the offset for subsequent replacements
        offset += len(modification) - (end - start)

    # Prepare citations for listing at the bottom, ensuring unique document_ids are listed once
    unique_citations = {number: doc_id for doc_id, number in document_id_to_number.items()}
    citation_list = '\n'.join([f'[{doc_id}] source: "{documents[doc_id - 1]["snippet"]}"' for doc_id, number in sorted(unique_citations.items(), key=lambda item: item[1])])
    text_with_citations = f'{text}\n\n{citation_list}'
    return text_with_citations

print(insert_citations_in_order(response.text, response.citations))