Spaces:

Schmitz005
/

huggingwhale.ai

Sleeping

Schmitz005 commited on about 1 month ago

Commit

f2b5dca

verified ·

1 Parent(s): 9ecdbea

Delete whalecore

Files changed (4) hide show

whalecore/__init__.py DELETED Viewed

File without changes

whalecore/agents.py DELETED Viewed

@@ -1,36 +0,0 @@
-import yaml
-class Agent:
-    def __init__(self, name, persona, instructions):
-        self.name = name
-        self.persona = persona
-        self.instructions = instructions
-    def chat(self, message):
-        # Placeholder logic — replace with real LLM call later
-        return f"🧠 {self.name} says:\n{self.instructions}\n\n{self.persona}\n\nYou said: {message[:260]}..."
-def load_agents(config_path="config.yaml"):
-    with open(config_path, 'r') as f:
-        config = yaml.safe_load(f)
-    assert isinstance(config, dict), "YAML must contain a top-level 'agents:' key"
-    assert 'agents' in config, "Missing 'agents' key in YAML file"
-    print("🧠 YAML loaded successfully:", config)
-    agents = []
-    for agent_conf in config['agents']:
-        agent = Agent(
-            name=agent_conf['name'],
-            persona=agent_conf['persona'],
-            instructions=agent_conf['instructions']
-        )
-        agents.append(agent)
-    return agents
-def run_agents_on_text(agent_list, text):
-    results = {}
-    for agent in agent_list:
-        results[agent.name] = agent.chat(text)
-    return results

whalecore/parser.py DELETED Viewed

@@ -1,49 +0,0 @@
-import os
-import PyPDF2
-import whisper
-from pydub import AudioSegment
-from sentence_transformers import SentenceTransformer
-import warnings
-warnings.filterwarnings(
-    "ignore",
-    category=FutureWarning,
-    message="`clean_up_tokenization_spaces` was not set.*"
-)
-model = SentenceTransformer('all-MiniLM-L6-v2')
-def parse_pdf(filepath):
-    text = ""
-    with open(filepath, 'rb') as f:
-        reader = PyPDF2.PdfReader(f)
-        for page in reader.pages:
-            text += page.extract_text() + "\n"
-    return text
-def parse_audio(filepath):
-    model = whisper.load_model("base")
-    result = model.transcribe(filepath)
-    return result['text']
-def parse_text(filepath):
-    with open(filepath, 'r') as f:
-        return f.read()
-def parse_file(filepath):
-    if filepath.endswith('.pdf'):
-        return parse_pdf(filepath)
-    elif filepath.endswith(('.mp3', '.wav', '.m4a')):
-        return parse_audio(filepath)
-    elif filepath.endswith('.txt'):
-        return parse_text(filepath)
-    else:
-        raise ValueError(f"Unsupported file type: {filepath}")
-def chunk_text(text, chunk_size=300):
-    words = text.split()
-    return [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
-def chunk_and_embed(text):
-    chunks = chunk_text(text)
-    embeddings = model.encode(chunks).tolist()
-    return list(zip(chunks, embeddings))

whalecore/rag.py DELETED Viewed

@@ -1,37 +0,0 @@
-from sentence_transformers import SentenceTransformer
-from pymongo import MongoClient
-import numpy as np
-model = SentenceTransformer('all-MiniLM-L6-v2')
-client = MongoClient()
-db = client['huggingwhale']
-collection = db['docs']
-def chunk_text(text, chunk_size=300):
-    words = text.split()
-    return [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
-def embed_chunks(chunks):
-    return model.encode(chunks).tolist()
-def store_embeddings(chunks, embeddings):
-    docs = [
-        {"chunk": chunk, "embedding": emb}
-        for chunk, emb in zip(chunks, embeddings)
-    ]
-    collection.insert_many(docs)
-def query_rag(question, top_k=3):
-    question_vec = model.encode([question])[0]
-    results = collection.aggregate([
-        {
-            "$vectorSearch": {
-                "index": "default",
-                "path": "embedding",
-                "queryVector": question_vec,
-                "numCandidates": 100,
-                "limit": top_k
-            }
-        }
-    ])
-    return [doc['chunk'] for doc in results]